Python TfEnvの例、garage.tf.envs.base.TfEnv Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_base.py プロジェクト: xht033/garage

 def test_all_gym_envs_pickleable(self, spec):
     env = TfEnv(env_name=spec.id)
     step_env_with_gym_quirks(self,
                              env,
                              spec,
                              n=1,
                              render=True,
                              serialize_env=True)
     env.close()

コード例 #2

0

ファイルを表示

ファイル: example_runner_drl_av.py プロジェクト: chelseas/AdaptiveStressTestingToolbox

    def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(
                        snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(normalize(ASTEnv(simulator=sim,
                                                 reward_function=reward_function,
                                                 spaces=spaces,
                                                 **env_args
                                                 )))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)}

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler

                    local_runner.setup(
                        algo=algo,
                        env=env,
                        sampler_cls=sampler_cls,
                        sampler_args={"open_loop": False,
                                      "sim": sim,
                                      "reward_function": reward_function,
                                      'n_envs': n_parallel})

                    # Run the experiment
                    local_runner.train(**runner_args)

コード例 #3

0

ファイルを表示

def run_task(snapshot_config, *_):

    with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=1) as runner:

        # Instantiate the example classes
        sim = ExampleAVSimulator()
        reward_function = ExampleAVReward()
        spaces = ExampleAVSpaces()

        # Create the environment
        env = TfEnv(
            normalize(
                ASTEnv(blackbox_sim_state=True,
                       fixed_init_state=True,
                       s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                       simulator=sim,
                       reward_function=reward_function,
                       spaces=spaces)))

        # Instantiate the garage objects
        policy = GaussianLSTMPolicy(name='lstm_policy',
                                    env_spec=env.spec,
                                    hidden_dim=64)

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(env_spec=env.spec,
                    policy=policy,
                    baseline=baseline,
                    max_path_length=max_path_length,
                    discount=0.99,
                    kl_constraint='soft',
                    max_kl_step=0.01)

        sampler_cls = ASTVectorizedSampler

        runner.setup(algo=algo,
                     env=env,
                     sampler_cls=sampler_cls,
                     sampler_args={
                         "sim": sim,
                         "reward_function": reward_function
                     })

        runner.train(n_epochs=1, batch_size=4000, plot=False)

        print("Installation successfully validated")

コード例 #4

0

ファイルを表示

 def test_all_gym_envs(self, spec):
     env = TfEnv(spec.make())
     step_env_with_gym_quirks(env, spec)

コード例 #5

0

ファイルを表示

 def test_is_pickleable(self):
     env = TfEnv(env_name='CartPole-v1')
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip.env.spec == env.env.spec

コード例 #6

0

ファイルを表示

ファイル: example_runner_ge_av.py プロジェクト: chelseas/AdaptiveStressTestingToolbox

    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):
                # Instantiate the example classes
                sim = ExampleAVSimulator(**sim_args)
                # blackbox_sim_state=True,
                # open_loop=False,
                # fixed_initial_state=True,
                # max_path_length=max_path_length)
                reward_function = ExampleAVReward(**reward_args)
                spaces = ExampleAVSpaces(**spaces_args)

                # Create the environment
                # env1 = GoExploreASTEnv(open_loop=False,
                #                              blackbox_sim_state=True,
                #                              fixed_init_state=True,
                #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                #                              simulator=sim,
                #                              reward_function=reward_function,
                #                              spaces=spaces
                #                              )
                # env1 = gym.make('ast_toolbox:GoExploreAST-v1',
                #                 blackbox_sim_state=True,
                #                 open_loop=False,
                #                 fixed_init_state=True,
                #                 s_0=s_0,
                #                 simulator=sim,
                #                 reward_function=reward_function,
                #                 spaces=spaces
                #                 )
                env1 = gym.make(id=env_args.pop('id'),
                                simulator=sim,
                                reward_function=reward_function,
                                spaces=spaces,
                                **env_args)
                env2 = normalize(env1)
                env = TfEnv(env2)

                # Instantiate the garage objects
                policy = GoExplorePolicy(env_spec=env.spec)

                baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                 **baseline_args)

                algo = GoExplore(env_spec=env.spec,
                                 env=env,
                                 policy=policy,
                                 baseline=baseline,
                                 **algo_args)
                #     db_filename=db_filename,
                #     max_db_size=max_db_size,
                #     env=env,
                #
                #     policy=policy,
                #     baseline=baseline,
                #     # robust_policy=robust_policy,
                #     # robust_baseline=robust_baseline,
                #     max_path_length=max_path_length,
                #     discount=discount,
                #     save_paths_gap=1,
                #     save_paths_path=log_dir,
                #     # whole_paths=whole_paths
                # )

                sampler_cls = BatchSampler
                # sampler_args = {'n_envs': n_parallel}
                sampler_args = {}

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as local_runner:
                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # local_runner.setup(
                    #     algo=algo,
                    #     env=env,
                    #     sampler_cls=sampler_cls,
                    #     sampler_args={"sim": sim,
                    #                   "reward_function": reward_function})

                    # Run the experiment
                    best_cell = local_runner.train(
                        **runner_args
                    )  # n_epochs=n_itr, batch_size=batch_size, plot=False)

                    log_dir = run_experiment_args['log_dir']
                    db_filename = algo_args['db_filename']
                    s_0 = env_args['s_0']

                    pool_DB = db.DB()
                    pool_DB.open(db_filename + '_pool.dat',
                                 dbname=None,
                                 dbtype=db.DB_HASH,
                                 flags=db.DB_CREATE)
                    d_pool = shelve.Shelf(pool_DB,
                                          protocol=pickle.HIGHEST_PROTOCOL)
                    # pdb.set_trace()
                    print(best_cell)
                    temp = best_cell
                    paths = []
                    while (temp.parent is not None):
                        print(temp.observation)
                        action = temp.observation[1:].astype(np.float32) / 1000
                        paths.append({
                            'state': temp.state,
                            'reward': temp.reward,
                            'action': action,
                            'observation': np.array(s_0)
                        })
                        temp = d_pool[temp.parent]
                    print(temp.observation)
                    paths.append({
                        'state': temp.state,
                        'reward': temp.reward,
                        'action': action,
                        'observation': np.array(s_0)
                    })
                    # pdb.set_trace()
                    d_pool.close()

                    with open(log_dir + '/expert_trajectory.p', 'wb') as f:
                        pickle.dump([paths], f)
                    print('done!')

コード例 #7

0

ファイルを表示

ファイル: go_explore_av_rss_runner.py プロジェクト: chelseas/AdaptiveStressTestingToolbox

    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as runner:

                    # Instantiate the example classes
                    # sim = ExampleAVSimulator()
                    g = 9.8  # acceleration due to gravity

                    # this is y
                    lat_params = rss.LateralParams(
                        0,  # ρ
                        0.1 * g,  # a_lat_max_acc
                        0.05 * g,  # a_lat_min_brake
                        1.4  # Buffer distance
                    )

                    # this is x
                    long_params = rss.LongitudinalParams(
                        0,  # ρ
                        0.7 * g,  # a_max_brake
                        0.1 * g,  # a_max_acc
                        0.7 * g,  # a_min_brake1
                        0.7 * g,  # a_min_brake2
                        2.5,  # Buffer
                    )
                    sim = AVRSSSimulator(lat_params, long_params)
                    reward_function = HeuristicReward(
                        PedestrianNoiseGaussian(1, 1, 0.2, .01),
                        np.array([-10000, -1000, 0]))
                    # reward_function = ExampleAVReward()
                    spaces = ExampleAVSpaces()

                    # Create the environment
                    # env1 = GoExploreASTEnv(open_loop=False,
                    #                              blackbox_sim_state=True,
                    #                              fixed_init_state=True,
                    #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                    #                              simulator=sim,
                    #                              reward_function=reward_function,
                    #                              spaces=spaces
                    s_0 = [-1.0, -2.0, 1.0, 11.17, -35.0]
                    #                              )
                    env1 = gym.make('ast_toolbox:GoExploreAST-v1',
                                    open_loop=False,
                                    action_only=True,
                                    fixed_init_state=True,
                                    s_0=s_0,
                                    simulator=sim,
                                    reward_function=reward_function,
                                    spaces=spaces)
                    env2 = normalize(env1)
                    env = TfEnv(env2)

                    # Instantiate the garage objects
                    policy = GoExplorePolicy(env_spec=env.spec)

                    baseline = LinearFeatureBaseline(env_spec=env.spec)

                    algo = GoExplore(
                        db_filename=db_filename,
                        max_db_size=max_db_size,
                        env=env,
                        env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        max_path_length=max_path_length,
                        discount=discount,
                        # whole_paths=whole_paths
                    )

                    sampler_cls = BatchSampler
                    sampler_args = {'n_envs': n_parallel}

                    runner.setup(algo=algo,
                                 env=env,
                                 sampler_cls=sampler_cls,
                                 sampler_args=sampler_args)

                    # runner.setup(
                    #     algo=algo,
                    #     env=env,
                    #     sampler_cls=sampler_cls,
                    #     sampler_args={"sim": sim,
                    #                   "reward_function": reward_function})

                    # Run the experiment
                    paths = runner.train(n_epochs=n_itr,
                                         batch_size=batch_size,
                                         plot=False)
                    print(paths)
                    best_traj = paths.trajectory * np.array([
                        1, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000,
                        1 / 1000
                    ])
                    peds = sim._peds
                    car = np.expand_dims(sim._car, axis=0)
                    car_obs = sim._car_obs
                    for step in range(best_traj.shape[0]):
                        sim.step(action=best_traj[step, 1:], open_loop=False)
                        peds = np.concatenate((peds, sim._peds), axis=0)
                        car = np.concatenate(
                            (car, np.expand_dims(sim._car, axis=0)), axis=0)
                        car_obs = np.concatenate((car_obs, sim._car_obs),
                                                 axis=0)

                    import matplotlib.pyplot as plt
                    plt.scatter(car[:, 2], car[:, 3])
                    plt.scatter(peds[:, 2], peds[:, 3])
                    plt.scatter(car_obs[:, 2], car_obs[:, 3])
                    pdb.set_trace()
                    print('done!')

コード例 #8

0

ファイルを表示

ファイル: ControlPolicyVisualize.py プロジェクト: chelseas/AdaptiveStressTestingToolbox

import os
import time

import joblib
import numpy as np
import tensorflow as tf
from garage.tf.envs.base import TfEnv

from .cartpole import CartPoleEnv

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # just use CPU

env = TfEnv(CartPoleEnv(use_seed=False))

with tf.Session() as sess:
    data = joblib.load("../Cartpole/control_policy.pkl")
    agent = data['policy']

    o = env.reset()
    agent.reset()
    path_length = 0
    env.render()
    max_path_length = 100

    total_r = 0
    while path_length < max_path_length:
        a, agent_info = agent.get_action(o)
        a = np.argmax(agent_info["prob"])
        next_o, r, d, env_info = env.step(a)
        path_length += 1
        total_r += r

コード例 #9

0

ファイルを表示

ファイル: ControlPolicyTrain.py プロジェクト: chelseas/AdaptiveStressTestingToolbox

tabular_log_file = osp.join(log_dir, "progress.csv")
text_log_file = osp.join(log_dir, "debug.log")
params_log_file = osp.join(log_dir, "params.json")
pkl_file = osp.join(log_dir, "params.pkl")

logger.add_text_output(text_log_file)
logger.add_tabular_output(tabular_log_file)
prev_snapshot_dir = logger.get_snapshot_dir()
prev_mode = logger.get_snapshot_mode()
logger.set_snapshot_dir(log_dir)
logger.set_snapshot_mode("gap")
logger.set_snapshot_gap(1)
logger.set_log_tabular_only(False)
logger.push_prefix("[%s] " % "Carpole-RL")

env = TfEnv(CartPoleEnv(use_seed=False))
# env = TfEnv(GridWorldEnv())

policy = CategoricalMLPPolicy(
    name='protagonist',
    env_spec=env.spec,
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,

コード例 #10

0

ファイルを表示

ファイル: example_runner_drl_av.py プロジェクト: sisl/AdaptiveStressTestingToolbox

    def run_task(snapshot_config, *_):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                with LocalTFRunner(snapshot_config=snapshot_config,
                                   max_cpus=4,
                                   sess=sess) as local_runner:
                    # Instantiate the example classes
                    sim = ExampleAVSimulator(**sim_args)
                    reward_function = ExampleAVReward(**reward_args)
                    spaces = ExampleAVSpaces(**spaces_args)

                    # Create the environment
                    if 'id' in env_args:
                        env_args.pop('id')
                    env = TfEnv(
                        normalize(
                            ASTEnv(simulator=sim,
                                   reward_function=reward_function,
                                   spaces=spaces,
                                   **env_args)))

                    # Instantiate the garage objects
                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = PPO(env_spec=env.spec,
                               policy=policy,
                               baseline=baseline,
                               optimizer=optimizer,
                               optimizer_args=optimizer_args,
                               **algo_args)

                    sampler_cls = ASTVectorizedSampler
                    sampler_args['sim'] = sim
                    sampler_args['reward_function'] = reward_function

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    # Run the experiment
                    local_runner.train(**runner_args)

                    if save_expert_trajectory:
                        load_convert_and_save_drl_expert_trajectory(
                            last_iter_filename=os.path.join(
                                run_experiment_args['log_dir'], 'itr_' +
                                str(runner_args['n_epochs'] - 1) + '.pkl'),
                            expert_trajectory_filename=os.path.join(
                                run_experiment_args['log_dir'],
                                'expert_trajectory.pkl'))

                    print('done!')

コード例 #11

0

ファイルを表示

    def run_task(snapshot_config, *_):

        config = tf.ConfigProto(device_count={'GPU': 0})
        # config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            with tf.variable_scope('AST', reuse=tf.AUTO_REUSE):

                # Instantiate the example classes
                sim = ExampleAVSimulator(**sim_args)
                reward_function = ExampleAVReward(**reward_args)
                spaces = ExampleAVSpaces(**spaces_args)

                # Create the environment
                # env1 = GoExploreASTEnv(open_loop=False,
                #                              blackbox_sim_state=True,
                #                              fixed_init_state=True,
                #                              s_0=[-0.5, -4.0, 1.0, 11.17, -35.0],
                #                              simulator=sim,
                #                              reward_function=reward_function,
                #                              spaces=spaces
                #                              )
                env1 = gym.make(id=env_args.pop('id'),
                                simulator=sim,
                                reward_function=reward_function,
                                spaces=spaces,
                                **env_args)
                env2 = normalize(env1)
                env = TfEnv(env2)

                sampler_cls = BatchSampler
                # sampler_args = {'n_envs': n_parallel}
                sampler_args = {}
                # expert_trajectory_file = log_dir + '/expert_trajectory.p'
                # with open(expert_trajectory_file, 'rb') as f:
                #     expert_trajectory = pickle.load(f)

                #
                # #Run backwards algorithm to robustify
                with LocalTFRunner(snapshot_config=snapshot_config,
                                   sess=sess) as local_runner:

                    policy = GaussianLSTMPolicy(env_spec=env.spec,
                                                **policy_args)
                    # name='lstm_policy',
                    # env_spec=env.spec,
                    # hidden_dim=64,
                    # use_peepholes=True)

                    baseline = LinearFeatureBaseline(env_spec=env.spec,
                                                     **baseline_args)

                    optimizer = ConjugateGradientOptimizer
                    optimizer_args = {
                        'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)
                    }

                    algo = BackwardAlgorithm(env=env,
                                             env_spec=env.spec,
                                             policy=policy,
                                             baseline=baseline,
                                             optimizer=optimizer,
                                             optimizer_args=optimizer_args,
                                             **algo_args)
                    # expert_trajectory=expert_trajectory[-1],
                    # epochs_per_step = 10,
                    # scope=None,
                    # max_path_length=max_path_length,
                    # discount=discount,
                    # gae_lambda=1,
                    # center_adv=True,
                    # positive_adv=False,
                    # fixed_horizon=False,
                    # pg_loss='surrogate_clip',
                    # lr_clip_range=1.0,
                    # max_kl_step=1.0,

                    # policy_ent_coeff=0.0,
                    # use_softplus_entropy=False,
                    # use_neg_logli_entropy=False,
                    # stop_entropy_gradient=False,
                    # entropy_method='no_entropy',
                    # name='PPO',
                    # )

                    local_runner.setup(algo=algo,
                                       env=env,
                                       sampler_cls=sampler_cls,
                                       sampler_args=sampler_args)

                    results = local_runner.train(**runner_args)
                    # pdb.set_trace()
                    print('done')
                    log_dir = run_experiment_args['log_dir']
                    with open(log_dir + '/paths.gz', 'wb') as f:
                        try:
                            compress_pickle.dump(results,
                                                 f,
                                                 compression="gzip",
                                                 set_default_extension=False)
                        except MemoryError:
                            print('1')
                            # pdb.set_trace()
                            for idx, result in enumerate(results):
                                with open(
                                        log_dir + '/path_' + str(idx) + '.gz',
                                        'wb') as ff:
                                    try:
                                        compress_pickle.dump(
                                            result,
                                            ff,
                                            compression="gzip",
                                            set_default_extension=False)
                                    except MemoryError:
                                        print('2')