def test_finite_difference_hvp_2x2_non_diagonal(self, a_val, b_val, x_val,
                                                    y_val, vector):
        """Test Hessian-vector product for a function with two variables whose Hessian
        is non-diagonal.
        """
        a_val = [a_val]
        b_val = [b_val]
        vector = np.array([vector], dtype=np.float32)

        policy = HelperPolicy(n_vars=2)
        params = policy.get_params()
        x, y = params[0], params[1]
        a = tf.constant(a_val)
        b = tf.constant(b_val)
        f = a * (x**3) + b * (y**3) + (x**2) * y + (y**2) * x

        expected_hessian = compute_hessian(f, [x, y])
        expected_hvp = tf.matmul(vector, expected_hessian)
        reg_coeff = 1e-5
        hvp = FiniteDifferenceHvp(base_eps=1)

        self.sess.run(tf.compat.v1.global_variables_initializer())
        self.sess.run(x.assign([x_val]))
        self.sess.run(y.assign([y_val]))
        hvp.update_hvp(f, policy, (a, b), reg_coeff)
        hx = hvp.build_eval((np.array(a_val), np.array(b_val)))
        hvp = hx(vector[0])
        expected_hvp = expected_hvp.eval()
        assert np.allclose(hvp, expected_hvp)
    def test_finite_difference_hvp(self):
        """Test Hessian-vector product for a function with one variable."""
        policy = HelperPolicy(n_vars=1)
        x = policy.get_params()[0]
        a_val = np.array([5.0])
        a = tf.constant([0.0])
        f = a * (x**2)
        expected_hessian = 2 * a_val
        vector = np.array([10.0])
        expected_hvp = expected_hessian * vector
        reg_coeff = 1e-5
        hvp = FiniteDifferenceHvp()

        self.sess.run(tf.compat.v1.global_variables_initializer())
        hvp.update_hvp(f, policy, (a, ), reg_coeff)
        hx = hvp.build_eval(np.array([a_val]))
        computed_hvp = hx(vector)
        assert np.allclose(computed_hvp, expected_hvp)
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(
                        snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(normalize(ASTEnv(simulator=sim,
                                                 reward_function=reward_function,
                                                 spaces=spaces,
                                                 **env_args
                                                 )))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)}

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler

                    local_runner.setup(
                        algo=algo,
                        env=env,
                        sampler_cls=sampler_cls,
                        sampler_args={"open_loop": False,
                                      "sim": sim,
                                      "reward_function": reward_function,
                                      'n_envs': n_parallel})

                    # Run the experiment
                    local_runner.train(**runner_args)
    def test_pickleable(self):
        policy = HelperPolicy(n_vars=1)
        x = policy.get_params()[0]
        a_val = np.array([5.0])
        a = tf.constant([0.0])
        f = a * (x**2)
        vector = np.array([10.0])
        reg_coeff = 1e-5
        hvp = FiniteDifferenceHvp()

        self.sess.run(tf.compat.v1.global_variables_initializer())
        hvp.update_hvp(f, policy, (a, ), reg_coeff)
        hx = hvp.build_eval(np.array([a_val]))
        before_pickle = hx(vector)

        hvp = pickle.loads(pickle.dumps(hvp))
        hvp.update_hvp(f, policy, (a, ), reg_coeff)
        after_pickle = hx(vector)
        assert np.equal(before_pickle, after_pickle)
Ejemplo n.º 5
0
        if trial > args.trial_start:
            old_log_dir = args.log_dir + '/' + str(trial - 1)
            logger.pop_prefix()
            logger.remove_text_output(osp.join(old_log_dir, 'text.txt'))
            logger.remove_tabular_output(osp.join(old_log_dir, 'process.csv'))
        logger.add_text_output(text_log_file)
        logger.add_tabular_output(tabular_log_file)
        logger.push_prefix("[" + args.exp_name + '_trial ' + str(trial) + "]")

        np.random.seed(trial)

        params = policy.get_params()
        sess.run(tf.variables_initializer(params))
        baseline = LinearFeatureBaseline(env_spec=env.spec)
        optimizer = ConjugateGradientOptimizer
        optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)}

        top_paths = BPQ.BoundedPriorityQueue(top_k)
        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=args.batch_size,
            step_size=args.step_size,
            n_itr=args.n_itr,
            store_paths=True,
            optimizer=optimizer,
            optimizer_args=optimizer_args,
            max_path_length=max_path_length,
            top_paths=top_paths,
            plot=False,
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(
                        normalize(
                            ASTEnv(simulator=sim,
                                   reward_function=reward_function,
                                   spaces=spaces,
                                   **env_args)))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler
                    sampler_args['sim'] = sim
                    sampler_args['reward_function'] = reward_function

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)

                    if save_expert_trajectory:
                        load_convert_and_save_drl_expert_trajectory(
                            last_iter_filename=os.path.join(
                                run_experiment_args['log_dir'], 'itr_' +
                                str(runner_args['n_epochs'] - 1) + '.pkl'),
                            expert_trajectory_filename=os.path.join(
                                run_experiment_args['log_dir'],
                                'expert_trajectory.pkl'))

                    print('done!')
Ejemplo n.º 7
0
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                # Instantiate the example classes
                sim = ExampleAVSimulator(**sim_args)
                reward_function = ExampleAVReward(**reward_args)
                spaces = ExampleAVSpaces(**spaces_args)

                # Create the environment
                # env1 = GoExploreASTEnv(open_loop=False,
                #                              blackbox_sim_state=True,
                #                              fixed_init_state=True,
                #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                #                              simulator=sim,
                #                              reward_function=reward_function,
                #                              spaces=spaces
                #                              )
                env1 = gym.make(id=env_args.pop('id'),
                                simulator=sim,
                                reward_function=reward_function,
                                spaces=spaces,
                                **env_args)
                env2 = normalize(env1)
                env = TfEnv(env2)

                sampler_cls = BatchSampler
                # sampler_args = {'n_envs': n_parallel}
                sampler_args = {}
                # expert_trajectory_file = log_dir + '/expert_trajectory.p'
                # with open(expert_trajectory_file, 'rb') as f:
                #     expert_trajectory = pickle.load(f)

                #
                # #Run backwards algorithm to robustify
                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as local_runner:

                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)
                    # name='lstm_policy',
                    # env_spec=env.spec,
                    # hidden_dim=64,
                    # use_peepholes=True)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = BackwardAlgorithm(env=env,
                                             env_spec=env.spec,
                                             policy=policy,
                                             baseline=baseline,
                                             optimizer=optimizer,
                                             optimizer_args=optimizer_args,
                                             **algo_args)
                    # expert_trajectory=expert_trajectory[-1],
                    # epochs_per_step = 10,
                    # scope=None,
                    # max_path_length=max_path_length,
                    # discount=discount,
                    # gae_lambda=1,
                    # center_adv=True,
                    # positive_adv=False,
                    # fixed_horizon=False,
                    # pg_loss='surrogate_clip',
                    # lr_clip_range=1.0,
                    # max_kl_step=1.0,

                    # policy_ent_coeff=0.0,
                    # use_softplus_entropy=False,
                    # use_neg_logli_entropy=False,
                    # stop_entropy_gradient=False,
                    # entropy_method='no_entropy',
                    # name='PPO',
                    # )

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    results = local_runner.train(**runner_args)
                    # pdb.set_trace()
                    print('done')
                    log_dir = run_experiment_args['log_dir']
                    with open(log_dir + '/paths.gz', 'wb') as f:
                        try:
                            compress_pickle.dump(results,
                                                 f,
                                                 compression="gzip",
                                                 set_default_extension=False)
                        except MemoryError:
                            print('1')
                            # pdb.set_trace()
                            for idx, result in enumerate(results):
                                with open(
                                        log_dir + '/path_' + str(idx) + '.gz',
                                        'wb') as ff:
                                    try:
                                        compress_pickle.dump(
                                            result,
                                            ff,
                                            compression="gzip",
                                            set_default_extension=False)
                                    except MemoryError:
                                        print('2')