Ejemplo n.º 1
0
def test_multidiscrete_identity(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn an identity transformation (i.e. return observation as an action)
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)

    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)
    env_fn = lambda: MultiDiscreteIdentityEnv((3, 3), episode_len=100)
    simple_test(env_fn, learn_fn, 0.9)
Ejemplo n.º 2
0
def test_continuous_identity(alg):
    '''
    Test if the algorithm (with an mlp policy)
    can learn an identity transformation (i.e. return observation as an action)
    to a required precision
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)
    learn_fn = lambda e: get_learn_function(alg)(env=e, **kwargs)

    env_fn = lambda: BoxIdentityEnv((1, ), episode_len=100)
    simple_test(env_fn, learn_fn, -0.1)
Ejemplo n.º 3
0
def test_mnist(alg):
    '''
    Test if the algorithm can learn to classify MNIST digits.
    Uses CNN policy.
    '''

    learn_kwargs = learn_args[alg]
    learn_kwargs.update(common_kwargs)

    learn = get_learn_function(alg)

    def learn_fn(e):
        return learn(env=e, **learn_kwargs)

    def env_fn():
        return MnistEnv(episode_len=100)

    simple_test(env_fn, learn_fn, 0.6)
Ejemplo n.º 4
0
def test_fixed_sequence(alg, rnn):
    '''
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    '''

    kwargs = learn_kwargs[alg]
    kwargs.update(common_kwargs)

    if alg == 'ppo2' and rnn.endswith('lstm'):
        rnn = 'ppo_' + rnn

    def env_fn():
        return FixedSequenceEnv(n_actions=10, episode_len=5)

    def learn(e):
        return get_learn_function(alg)(env=e, network=rnn, **kwargs)

    simple_test(env_fn, learn, 0.7)