def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(
                        snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(normalize(ASTEnv(simulator=sim,
                                                 reward_function=reward_function,
                                                 spaces=spaces,
                                                 **env_args
                                                 )))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)}

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler

                    local_runner.setup(
                        algo=algo,
                        env=env,
                        sampler_cls=sampler_cls,
                        sampler_args={"open_loop": False,
                                      "sim": sim,
                                      "reward_function": reward_function,
                                      'n_envs': n_parallel})

                    # Run the experiment
                    local_runner.train(**runner_args)
Ejemplo n.º 2
0
def run_task(snapshot_config, *_):

    with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=1) as runner:

        # Instantiate the example classes
        sim = ExampleAVSimulator()
        reward_function = ExampleAVReward()
        spaces = ExampleAVSpaces()

        # Create the environment
        env = TfEnv(
            normalize(
                ASTEnv(blackbox_sim_state=True,
                       fixed_init_state=True,
                       s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                       simulator=sim,
                       reward_function=reward_function,
                       spaces=spaces)))

        # Instantiate the garage objects
        policy = GaussianLSTMPolicy(name='lstm_policy',
                                    env_spec=env.spec,
                                    hidden_dim=64)

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(env_spec=env.spec,
                    policy=policy,
                    baseline=baseline,
                    max_path_length=max_path_length,
                    discount=0.99,
                    kl_constraint='soft',
                    max_kl_step=0.01)

        sampler_cls = ASTVectorizedSampler

        runner.setup(algo=algo,
                     env=env,
                     sampler_cls=sampler_cls,
                     sampler_args={
                         "sim": sim,
                         "reward_function": reward_function
                     })

        runner.train(n_epochs=1, batch_size=4000, plot=False)

        print("Installation successfully validated")
def test_example_av_spaces():
    space = ExampleAVSpaces(num_peds=2)

    assert isinstance(space.action_space, Box)
    assert isinstance(space.observation_space, Box)
    def run_task(snapshot_config, *_):

        seed = 0
        # top_k = 10
        np.random.seed(seed)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:

                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = ASTEnv(simulator=sim,
                                 reward_function=reward_function,
                                 spaces=spaces,
                                 **env_args)
                    env = TfEnv(env)

                    policy = ContinuousMLPPolicy(name='ast_agent',
                                                 env_spec=env.spec,
                                                 **policy_args)

                    params = policy.get_params()
                    sess.run(tf.variables_initializer(params))

                    # Instantiate the garage objects
                    baseline = ZeroBaseline(env_spec=env.spec)

                    top_paths = BPQ.BoundedPriorityQueue(**bpq_args)

                    sampler_cls = ASTVectorizedSampler
                    sampler_args = {
                        "open_loop": False,
                        "sim": sim,
                        "reward_function": reward_function,
                        "n_envs": n_parallel
                    }

                    if ga_type == 'ga':
                        print('ga')
                        algo = GA(env_spec=env.spec,
                                  policy=policy,
                                  baseline=baseline,
                                  top_paths=top_paths,
                                  **algo_args)
                    elif ga_type == 'gasm':
                        print('gasm')
                        algo = GASM(env_spec=env.spec,
                                    policy=policy,
                                    baseline=baseline,
                                    top_paths=top_paths,
                                    **algo_args)
                    else:
                        raise NotImplementedError

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):
                # Instantiate the example classes
                sim = ExampleAVSimulator(**sim_args)
                # blackbox_sim_state=True,
                # open_loop=False,
                # fixed_initial_state=True,
                # max_path_length=max_path_length)
                reward_function = ExampleAVReward(**reward_args)
                spaces = ExampleAVSpaces(**spaces_args)

                # Create the environment
                # env1 = GoExploreASTEnv(open_loop=False,
                #                              blackbox_sim_state=True,
                #                              fixed_init_state=True,
                #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                #                              simulator=sim,
                #                              reward_function=reward_function,
                #                              spaces=spaces
                #                              )
                # env1 = gym.make('ast_toolbox:GoExploreAST-v1',
                #                 blackbox_sim_state=True,
                #                 open_loop=False,
                #                 fixed_init_state=True,
                #                 s_0=s_0,
                #                 simulator=sim,
                #                 reward_function=reward_function,
                #                 spaces=spaces
                #                 )
                env1 = gym.make(id=env_args.pop('id'),
                                simulator=sim,
                                reward_function=reward_function,
                                spaces=spaces,
                                **env_args)
                env2 = normalize(env1)
                env = TfEnv(env2)

                # Instantiate the garage objects
                policy = GoExplorePolicy(env_spec=env.spec)

                baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                 **baseline_args)

                algo = GoExplore(env_spec=env.spec,
                                 env=env,
                                 policy=policy,
                                 baseline=baseline,
                                 **algo_args)
                #     db_filename=db_filename,
                #     max_db_size=max_db_size,
                #     env=env,
                #
                #     policy=policy,
                #     baseline=baseline,
                #     # robust_policy=robust_policy,
                #     # robust_baseline=robust_baseline,
                #     max_path_length=max_path_length,
                #     discount=discount,
                #     save_paths_gap=1,
                #     save_paths_path=log_dir,
                #     # whole_paths=whole_paths
                # )

                sampler_cls = BatchSampler
                # sampler_args = {'n_envs': n_parallel}
                sampler_args = {}

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as local_runner:
                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # local_runner.setup(
                    #     algo=algo,
                    #     env=env,
                    #     sampler_cls=sampler_cls,
                    #     sampler_args={"sim": sim,
                    #                   "reward_function": reward_function})

                    # Run the experiment
                    best_cell = local_runner.train(
                        **runner_args
                    )  # n_epochs=n_itr, batch_size=batch_size, plot=False)

                    log_dir = run_experiment_args['log_dir']
                    db_filename = algo_args['db_filename']
                    s_0 = env_args['s_0']

                    pool_DB = db.DB()
                    pool_DB.open(db_filename + '_pool.dat',
                                 dbname=None,
                                 dbtype=db.DB_HASH,
                                 flags=db.DB_CREATE)
                    d_pool = shelve.Shelf(pool_DB,
                                          protocol=pickle.HIGHEST_PROTOCOL)
                    # pdb.set_trace()
                    print(best_cell)
                    temp = best_cell
                    paths = []
                    while (temp.parent is not None):
                        print(temp.observation)
                        action = temp.observation[1:].astype(np.float32) / 1000
                        paths.append({
                            'state': temp.state,
                            'reward': temp.reward,
                            'action': action,
                            'observation': np.array(s_0)
                        })
                        temp = d_pool[temp.parent]
                    print(temp.observation)
                    paths.append({
                        'state': temp.state,
                        'reward': temp.reward,
                        'action': action,
                        'observation': np.array(s_0)
                    })
                    # pdb.set_trace()
                    d_pool.close()

                    with open(log_dir + '/expert_trajectory.p', 'wb') as f:
                        pickle.dump([paths], f)
                    print('done!')
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as runner:

                    # Instantiate the example classes
                    # sim = ExampleAVSimulator()
                    g = 9.8  # acceleration due to gravity

                    # this is y
                    lat_params = rss.LateralParams(
                        0,  # ρ
                        0.1 * g,  # a_lat_max_acc
                        0.05 * g,  # a_lat_min_brake
                        1.4  # Buffer distance
                    )

                    # this is x
                    long_params = rss.LongitudinalParams(
                        0,  # ρ
                        0.7 * g,  # a_max_brake
                        0.1 * g,  # a_max_acc
                        0.7 * g,  # a_min_brake1
                        0.7 * g,  # a_min_brake2
                        2.5,  # Buffer
                    )
                    sim = AVRSSSimulator(lat_params, long_params)
                    reward_function = HeuristicReward(
                        PedestrianNoiseGaussian(1, 1, 0.2, .01),
                        np.array([-10000, -1000, 0]))
                    # reward_function = ExampleAVReward()
                    spaces = ExampleAVSpaces()

                    # Create the environment
                    # env1 = GoExploreASTEnv(open_loop=False,
                    #                              blackbox_sim_state=True,
                    #                              fixed_init_state=True,
                    #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                    #                              simulator=sim,
                    #                              reward_function=reward_function,
                    #                              spaces=spaces
                    s_0 = [-1.0, -2.0, 1.0, 11.17, -35.0]
                    #                              )
                    env1 = gym.make('ast_toolbox:GoExploreAST-v1',
                                    open_loop=False,
                                    action_only=True,
                                    fixed_init_state=True,
                                    s_0=s_0,
                                    simulator=sim,
                                    reward_function=reward_function,
                                    spaces=spaces)
                    env2 = normalize(env1)
                    env = TfEnv(env2)

                    # Instantiate the garage objects
                    policy = GoExplorePolicy(env_spec=env.spec)

                    baseline = LinearFeatureBaseline(env_spec=env.spec)

                    algo = GoExplore(
                        db_filename=db_filename,
                        max_db_size=max_db_size,
                        env=env,
                        env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        max_path_length=max_path_length,
                        discount=discount,
                        # whole_paths=whole_paths
                    )

                    sampler_cls = BatchSampler
                    sampler_args = {'n_envs': n_parallel}

                    runner.setup(algo=algo,
                                 env=env,
                                 sampler_cls=sampler_cls,
                                 sampler_args=sampler_args)

                    # runner.setup(
                    #     algo=algo,
                    #     env=env,
                    #     sampler_cls=sampler_cls,
                    #     sampler_args={"sim": sim,
                    #                   "reward_function": reward_function})

                    # Run the experiment
                    paths = runner.train(n_epochs=n_itr,
                                         batch_size=batch_size,
                                         plot=False)
                    print(paths)
                    best_traj = paths.trajectory * np.array([
                        1, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000,
                        1 / 1000
                    ])
                    peds = sim._peds
                    car = np.expand_dims(sim._car, axis=0)
                    car_obs = sim._car_obs
                    for step in range(best_traj.shape[0]):
                        sim.step(action=best_traj[step, 1:], open_loop=False)
                        peds = np.concatenate((peds, sim._peds), axis=0)
                        car = np.concatenate(
                            (car, np.expand_dims(sim._car, axis=0)), axis=0)
                        car_obs = np.concatenate((car_obs, sim._car_obs),
                                                 axis=0)

                    import matplotlib.pyplot as plt
                    plt.scatter(car[:, 2], car[:, 3])
                    plt.scatter(peds[:, 2], peds[:, 3])
                    plt.scatter(car_obs[:, 2], car_obs[:, 3])
                    pdb.set_trace()
                    print('done!')
    def run_task(snapshot_config, *_):

        seed = 0
        # top_k = 10
        np.random.seed(seed)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:

                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = ASTEnv(simulator=sim,
                                 reward_function=reward_function,
                                 spaces=spaces,
                                 **env_args)

                    top_paths = BPQ.BoundedPriorityQueue(**bpq_args)

                    if mcts_type == 'mcts':
                        print('mcts')
                        algo = MCTS(env=env, top_paths=top_paths, **algo_args)
                    elif mcts_type == 'mctsbv':
                        print('mctsbv')
                        algo = MCTSBV(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    elif mcts_type == 'mctsrs':
                        print('mctsrs')
                        algo = MCTSRS(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    else:
                        raise NotImplementedError

                    sampler_cls = ASTVectorizedSampler

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args={
                                           "open_loop": False,
                                           "sim": sim,
                                           "reward_function": reward_function,
                                           "n_envs": n_parallel
                                       })

                    # Run the experiment
                    local_runner.train(**runner_args)

                    log_dir = run_experiment_args['log_dir']
                    with open(log_dir + '/best_actions.p', 'rb') as f:
                        best_actions = pickle.load(f)
                    expert_trajectories = []
                    for actions in best_actions:
                        sim.reset(s_0=env_args['s_0'])
                        path = []
                        for action in actions:
                            obs = sim.step(action)
                            state = sim.clone_state()
                            reward = reward_function.give_reward(
                                action=action, info=sim.get_reward_info())
                            path.append({
                                'state': state,
                                'reward': reward,
                                'action': action,
                                'observation': obs
                            })
                        expert_trajectories.append(path)
                    with open(log_dir + '/expert_trajectory.p', 'wb') as f:
                        pickle.dump(expert_trajectories, f)
    def __init__(self,
                 open_loop=True,
                 blackbox_sim_state=True,
                 fixed_init_state=False,
                 s_0=None,
                 simulator=None,
                 reward_function=None,
                 spaces=None):

        # gym_env = gym.make('ast_toolbox:GoExploreAST-v0', {'test':'test string'})
        # pdb.set_trace()
        # super().__init__(gym_env)
        # Constant hyper-params -- set by user
        self.open_loop = open_loop
        self.blackbox_sim_state = blackbox_sim_state  # is this redundant?
        self.spaces = spaces
        if spaces is None:
            self.spaces = ExampleAVSpaces()
        # These are set by reset, not the user
        self._done = False
        self._reward = 0.0
        self._info = {}
        self._step = 0
        self._action = None
        self._actions = []
        self._first_step = True
        self.reward_range = (-float('inf'), float('inf'))
        self.metadata = None
        self.spec._entry_point = []
        self._cum_reward = 0.0
        self.root_action = None
        self.sample_limit = 10000

        self.simulator = simulator
        if self.simulator is None:
            self.simulator = ExampleAVSimulator()

        if s_0 is None:
            self._init_state = self.observation_space.sample()
        else:
            self._init_state = s_0
        self._fixed_init_state = fixed_init_state

        self.reward_function = reward_function
        if self.reward_function is None:
            self.reward_function = ExampleAVReward()

        if hasattr(self.simulator, "vec_env_executor") and callable(getattr(self.simulator, "vec_env_executor")):
            self.vectorized = True
        else:
            self.vectorized = False
        # super().__init__(self)
        # Always call Serializable constructor last
        self.params_set = False
        self.db_filename = 'database.dat'
        self.key_list = []
        self.max_value = 0
        self.robustify_state = []
        self.robustify = False

        Parameterized.__init__(self)
Ejemplo n.º 9
0
    def run_task(snapshot_config, *_):

        seed = 0
        # top_k = 10
        np.random.seed(seed)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(
                        snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner:

                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = ASTEnv(simulator=sim,
                                 reward_function=reward_function,
                                 spaces=spaces,
                                 **env_args
                                 )

                    top_paths = BPQ.BoundedPriorityQueue(**bpq_args)

                    if mcts_type == 'mcts':
                        print('mcts')
                        algo = MCTS(env=env,
                                    top_paths=top_paths,
                                    **algo_args)
                    elif mcts_type == 'mctsbv':
                        print('mctsbv')
                        algo = MCTSBV(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    elif mcts_type == 'mctsrs':
                        print('mctsrs')
                        algo = MCTSRS(env=env,
                                      top_paths=top_paths,
                                      **algo_args)
                    else:
                        raise NotImplementedError

                    sampler_cls = ASTVectorizedSampler
                    sampler_args['sim'] = sim
                    sampler_args['reward_function'] = reward_function

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)

                    log_dir = run_experiment_args['log_dir']

                    if save_expert_trajectory:
                        load_convert_and_save_mcts_expert_trajectory(
                            best_actions_filename=log_dir + '/best_actions.p',
                            expert_trajectory_filename=log_dir + '/expert_trajectory.p',
                            sim=sim,
                            s_0=env_args['s_0'],
                            reward_function=reward_function)
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(
                        normalize(
                            ASTEnv(simulator=sim,
                                   reward_function=reward_function,
                                   spaces=spaces,
                                   **env_args)))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler
                    sampler_args['sim'] = sim
                    sampler_args['reward_function'] = reward_function

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)

                    if save_expert_trajectory:
                        load_convert_and_save_drl_expert_trajectory(
                            last_iter_filename=os.path.join(
                                run_experiment_args['log_dir'], 'itr_' +
                                str(runner_args['n_epochs'] - 1) + '.pkl'),
                            expert_trajectory_filename=os.path.join(
                                run_experiment_args['log_dir'],
                                'expert_trajectory.pkl'))

                    print('done!')
Ejemplo n.º 11
0
    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                # Instantiate the example classes
                sim = ExampleAVSimulator(**sim_args)
                reward_function = ExampleAVReward(**reward_args)
                spaces = ExampleAVSpaces(**spaces_args)

                # Create the environment
                # env1 = GoExploreASTEnv(open_loop=False,
                #                              blackbox_sim_state=True,
                #                              fixed_init_state=True,
                #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                #                              simulator=sim,
                #                              reward_function=reward_function,
                #                              spaces=spaces
                #                              )
                env1 = gym.make(id=env_args.pop('id'),
                                simulator=sim,
                                reward_function=reward_function,
                                spaces=spaces,
                                **env_args)
                env2 = normalize(env1)
                env = TfEnv(env2)

                sampler_cls = BatchSampler
                # sampler_args = {'n_envs': n_parallel}
                sampler_args = {}
                # expert_trajectory_file = log_dir + '/expert_trajectory.p'
                # with open(expert_trajectory_file, 'rb') as f:
                #     expert_trajectory = pickle.load(f)

                #
                # #Run backwards algorithm to robustify
                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as local_runner:

                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)
                    # name='lstm_policy',
                    # env_spec=env.spec,
                    # hidden_dim=64,
                    # use_peepholes=True)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = BackwardAlgorithm(env=env,
                                             env_spec=env.spec,
                                             policy=policy,
                                             baseline=baseline,
                                             optimizer=optimizer,
                                             optimizer_args=optimizer_args,
                                             **algo_args)
                    # expert_trajectory=expert_trajectory[-1],
                    # epochs_per_step = 10,
                    # scope=None,
                    # max_path_length=max_path_length,
                    # discount=discount,
                    # gae_lambda=1,
                    # center_adv=True,
                    # positive_adv=False,
                    # fixed_horizon=False,
                    # pg_loss='surrogate_clip',
                    # lr_clip_range=1.0,
                    # max_kl_step=1.0,

                    # policy_ent_coeff=0.0,
                    # use_softplus_entropy=False,
                    # use_neg_logli_entropy=False,
                    # stop_entropy_gradient=False,
                    # entropy_method='no_entropy',
                    # name='PPO',
                    # )

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    results = local_runner.train(**runner_args)
                    # pdb.set_trace()
                    print('done')
                    log_dir = run_experiment_args['log_dir']
                    with open(log_dir + '/paths.gz', 'wb') as f:
                        try:
                            compress_pickle.dump(results,
                                                 f,
                                                 compression="gzip",
                                                 set_default_extension=False)
                        except MemoryError:
                            print('1')
                            # pdb.set_trace()
                            for idx, result in enumerate(results):
                                with open(
                                        log_dir + '/path_' + str(idx) + '.gz',
                                        'wb') as ff:
                                    try:
                                        compress_pickle.dump(
                                            result,
                                            ff,
                                            compression="gzip",
                                            set_default_extension=False)
                                    except MemoryError:
                                        print('2')