sess = tf.Session() sess.__enter__() # Instantiate the env data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl") sut = data['policy'] reward_function = ASTRewardS() simulator = CartpoleSimulator(sut=sut, max_path_length=max_path_length, use_seed=False, nd=1) env = TfEnv( ASTEnv( open_loop=open_loop, simulator=simulator, fixed_init_state=True, s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0], reward_function=reward_function, )) # Training with open(osp.join(args.log_dir, 'total_result.csv'), mode='w') as csv_file: fieldnames = ['step_count'] for i in range(top_k): fieldnames.append('reward ' + str(i)) writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for trial in range(args.n_trial): # Create the logger log_dir = args.log_dir + '/' + str(trial)
tf.set_random_seed(seed) with tf.Session() as sess: # Create env data = joblib.load("../CartPole/ControlPolicy/itr_5.pkl") sut = data['policy'] reward_function = ASTRewardS() simulator = CartpoleSimulator(sut=sut, max_path_length=100, use_seed=False) env = ASTEnv(open_loop=False, simulator=simulator, fixed_init_state=True, s_0=[0.0, 0.0, 0.0 * math.pi / 180, 0.0], reward_function=reward_function, ) env = TfEnv(env) # Create policy policy = DeterministicMLPPolicy( name='ast_agent', env_spec=env.spec, hidden_sizes=(64, 32), output_nonlinearity=tf.nn.tanh, ) params = policy.get_params() sess.run(tf.variables_initializer(params)) # Instantiate the garage objects baseline = ZeroBaseline(env_spec=env.spec) # optimizer = ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))