コード例 #1
0
from Algo import parallel_sampler
parallel_sampler.initialize(n_parallel=1)
parallel_sampler.set_seed(0)

policy = QMDPPolicy(env_spec=env.spec,
                    name="QMDP",
                    qmdp_param=env._wrapped_env.params)

baseline = LinearFeatureBaseline(env_spec=env.spec)

with tf.Session() as sess:

    algo = VPG_t(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=2048,
        max_path_length=env._wrapped_env.params['traj_limit'],
        n_itr=20000,
        discount=0.95,
        step_size=0.01,
        record_rewards=True,
        transfer=False,
        env_path=log_dir + '/TrainEnv',
        env_num=500,
        env_keep_itr=10,
    )

    algo.train(sess)