Exemple #1
0
def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS):
    np.random.seed(0)
    np_random.seed(0)

    env = DummyVecEnv([env_fn])

    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True)).as_default():
        tf.set_random_seed(0)

        model = learn_fn(env)

        sum_rew = 0
        done = True

        for i in range(n_trials):
            if done:
                obs = env.reset()
                state = model.initial_state

            if state is not None:
                a, v, state, _ = model.step(obs, S=state, M=[False])
            else:
                a, v, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(a)
            sum_rew += float(rew)

        print("Reward in {} trials is {}".format(n_trials, sum_rew))
        assert sum_rew > min_reward_fraction * n_trials, \
            'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
def test_identity(learn_func):
    '''
    Test if the algorithm (with a given policy) 
    can learn an identity transformation (i.e. return observation as an action)
    '''
    np.random.seed(0)
    np_random.seed(0)
    random.seed(0)

    env = DummyVecEnv([lambda: IdentityEnv(10)])

    with tf.Graph().as_default(), tf.Session().as_default():
        tf.set_random_seed(0)
        model = learn_func(env)

        N_TRIALS = 1000
        sum_rew = 0
        obs = env.reset()
        for i in range(N_TRIALS):
            obs, rew, done, _ = env.step(model.step(obs)[0])
            sum_rew += rew

        assert sum_rew > 0.9 * N_TRIALS
Exemple #3
0
def test_identity(learn_func):
    '''
    Test if the algorithm (with a given policy) 
    can learn an identity transformation (i.e. return observation as an action)
    '''
    np.random.seed(0)
    np_random.seed(0)
    random.seed(0)

    env = DummyVecEnv([lambda: IdentityEnv(10)])

    with tf.Graph().as_default(), tf.Session().as_default():
        tf.set_random_seed(0)
        model = learn_func(env)

        N_TRIALS = 1000
        sum_rew = 0
        obs = env.reset()
        for i in range(N_TRIALS):
            obs, rew, done, _ = env.step(model.step(obs)[0])
            sum_rew += rew

        assert sum_rew > 0.9 * N_TRIALS