예제 #1
0
sess = tf.Session()
sess.__enter__()

# Instantiate the env
data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl")
sut = data['policy']
reward_function = ASTRewardS()

simulator = CartpoleSimulator(sut=sut,
                              max_path_length=max_path_length,
                              use_seed=False,
                              nd=1)
env = TfEnv(
    ASTEnv(
        open_loop=open_loop,
        simulator=simulator,
        fixed_init_state=True,
        s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0],
        reward_function=reward_function,
    ))

# Training
with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file:
    fieldnames = ['step_count']
    for i in range(top_k):
        fieldnames.append('reward ' + str(i))
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()

    for trial in range(args.n_trial):
        # Create the logger
        log_dir = args.log_dir + '/' + str(trial)
예제 #2
0
tf.set_random_seed(seed)
with tf.Session() as sess:
    # Create env

    data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl")
    sut = data['policy']
    reward_function = ASTRewardS()

    simulator = CartpoleSimulator(sut=sut, max_path_length=100, use_seed=False)
    env = ASTEnv(open_loop=False,
                 simulator=simulator,
                 fixed_init_state=True,
                 s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0],
                 reward_function=reward_function,
                 )
    env = TfEnv(env)
    # Create policy
    policy = DeterministicMLPPolicy(
        name='ast_agent',
        env_spec=env.spec,
        hidden_sizes=(64, 32),
        output_nonlinearity=tf.nn.tanh,
    )

    params = policy.get_params()
    sess.run(tf.variables_initializer(params))

    # Instantiate the garage objects
    baseline = ZeroBaseline(env_spec=env.spec)
    # optimizer = ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))