Esempio n. 1
0
def run_task(*_):
    initial_goal = np.array([0.6, -0.1, 0.80])

    rospy.init_node('trpo_real_sawyer_pnp_exp', anonymous=True)

    pnp_env = TheanoEnv(
        PickAndPlaceEnv(initial_goal,
                        initial_joint_pos=INITIAL_ROBOT_JOINT_POS,
                        simulated=False))

    rospy.on_shutdown(pnp_env.shutdown)

    pnp_env.initialize()

    env = pnp_env

    policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=100,
        discount=0.99,
        step_size=0.01,
        plot=False,
        force_batch_sampler=True,
    )
    algo.train()
        logger.set_snapshot_gap(args.snapshot_gap)
        logger.log_parameters_lite(params_log_file, args)
        if trial > 0:
            old_log_dir = args.log_dir + '/' + str(trial - 1)
            logger.pop_prefix()
            logger.remove_text_output(osp.join(old_log_dir, 'text.txt'))
            logger.remove_tabular_output(osp.join(old_log_dir, 'process.csv'))
        logger.add_text_output(text_log_file)
        logger.add_tabular_output(tabular_log_file)
        logger.push_prefix("[" + args.exp_name + '_trial ' + str(trial) + "]")

        np.random.seed(trial)

        params = policy.get_params()
        sess.run(tf.variables_initializer(params))
        baseline = LinearFeatureBaseline(env_spec=env.spec)
        # optimizer = ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))

        top_paths = BPQ.BoundedPriorityQueue(top_k)
        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=args.batch_size,
            step_size=args.step_size,
            n_itr=args.n_itr,
            store_paths=True,
            # optimizer= optimizer,
            max_path_length=max_path_length,
            top_paths=top_paths,
            plot=False,