def test_all_gym_envs_pickleable(self, spec): env = TfEnv(env_name=spec.id) step_env_with_gym_quirks(self, env, spec, n=1, render=True, serialize_env=True) env.close()
def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner( snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv(normalize(ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args ))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)} algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler local_runner.setup( algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={"open_loop": False, "sim": sim, "reward_function": reward_function, 'n_envs': n_parallel}) # Run the experiment local_runner.train(**runner_args)
def run_task(snapshot_config, *_): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=1) as runner: # Instantiate the example classes sim = ExampleAVSimulator() reward_function = ExampleAVReward() spaces = ExampleAVSpaces() # Create the environment env = TfEnv( normalize( ASTEnv(blackbox_sim_state=True, fixed_init_state=True, s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], simulator=sim, reward_function=reward_function, spaces=spaces))) # Instantiate the garage objects policy = GaussianLSTMPolicy(name='lstm_policy', env_spec=env.spec, hidden_dim=64) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=max_path_length, discount=0.99, kl_constraint='soft', max_kl_step=0.01) sampler_cls = ASTVectorizedSampler runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={ "sim": sim, "reward_function": reward_function }) runner.train(n_epochs=1, batch_size=4000, plot=False) print("Installation successfully validated")
def test_all_gym_envs(self, spec): env = TfEnv(spec.make()) step_env_with_gym_quirks(env, spec)
def test_is_pickleable(self): env = TfEnv(env_name='CartPole-v1') round_trip = pickle.loads(pickle.dumps(env)) assert round_trip.env.spec == env.env.spec
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) # blackbox_sim_state=True, # open_loop=False, # fixed_initial_state=True, # max_path_length=max_path_length) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) # env1 = gym.make('ast_toolbox:GoExploreAST-v1', # blackbox_sim_state=True, # open_loop=False, # fixed_init_state=True, # s_0=s_0, # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) env1 = gym.make(id=env_args.pop('id'), simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env2 = normalize(env1) env = TfEnv(env2) # Instantiate the garage objects policy = GoExplorePolicy(env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) algo = GoExplore(env_spec=env.spec, env=env, policy=policy, baseline=baseline, **algo_args) # db_filename=db_filename, # max_db_size=max_db_size, # env=env, # # policy=policy, # baseline=baseline, # # robust_policy=robust_policy, # # robust_baseline=robust_baseline, # max_path_length=max_path_length, # discount=discount, # save_paths_gap=1, # save_paths_path=log_dir, # # whole_paths=whole_paths # ) sampler_cls = BatchSampler # sampler_args = {'n_envs': n_parallel} sampler_args = {} with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as local_runner: local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # local_runner.setup( # algo=algo, # env=env, # sampler_cls=sampler_cls, # sampler_args={"sim": sim, # "reward_function": reward_function}) # Run the experiment best_cell = local_runner.train( **runner_args ) # n_epochs=n_itr, batch_size=batch_size, plot=False) log_dir = run_experiment_args['log_dir'] db_filename = algo_args['db_filename'] s_0 = env_args['s_0'] pool_DB = db.DB() pool_DB.open(db_filename + '_pool.dat', dbname=None, dbtype=db.DB_HASH, flags=db.DB_CREATE) d_pool = shelve.Shelf(pool_DB, protocol=pickle.HIGHEST_PROTOCOL) # pdb.set_trace() print(best_cell) temp = best_cell paths = [] while (temp.parent is not None): print(temp.observation) action = temp.observation[1:].astype(np.float32) / 1000 paths.append({ 'state': temp.state, 'reward': temp.reward, 'action': action, 'observation': np.array(s_0) }) temp = d_pool[temp.parent] print(temp.observation) paths.append({ 'state': temp.state, 'reward': temp.reward, 'action': action, 'observation': np.array(s_0) }) # pdb.set_trace() d_pool.close() with open(log_dir + '/expert_trajectory.p', 'wb') as f: pickle.dump([paths], f) print('done!')
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as runner: # Instantiate the example classes # sim = ExampleAVSimulator() g = 9.8 # acceleration due to gravity # this is y lat_params = rss.LateralParams( 0, # ρ 0.1 * g, # a_lat_max_acc 0.05 * g, # a_lat_min_brake 1.4 # Buffer distance ) # this is x long_params = rss.LongitudinalParams( 0, # ρ 0.7 * g, # a_max_brake 0.1 * g, # a_max_acc 0.7 * g, # a_min_brake1 0.7 * g, # a_min_brake2 2.5, # Buffer ) sim = AVRSSSimulator(lat_params, long_params) reward_function = HeuristicReward( PedestrianNoiseGaussian(1, 1, 0.2, .01), np.array([-10000, -1000, 0])) # reward_function = ExampleAVReward() spaces = ExampleAVSpaces() # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces s_0 = [-1.0, -2.0, 1.0, 11.17, -35.0] # ) env1 = gym.make('ast_toolbox:GoExploreAST-v1', open_loop=False, action_only=True, fixed_init_state=True, s_0=s_0, simulator=sim, reward_function=reward_function, spaces=spaces) env2 = normalize(env1) env = TfEnv(env2) # Instantiate the garage objects policy = GoExplorePolicy(env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = GoExplore( db_filename=db_filename, max_db_size=max_db_size, env=env, env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=max_path_length, discount=discount, # whole_paths=whole_paths ) sampler_cls = BatchSampler sampler_args = {'n_envs': n_parallel} runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # runner.setup( # algo=algo, # env=env, # sampler_cls=sampler_cls, # sampler_args={"sim": sim, # "reward_function": reward_function}) # Run the experiment paths = runner.train(n_epochs=n_itr, batch_size=batch_size, plot=False) print(paths) best_traj = paths.trajectory * np.array([ 1, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000 ]) peds = sim._peds car = np.expand_dims(sim._car, axis=0) car_obs = sim._car_obs for step in range(best_traj.shape[0]): sim.step(action=best_traj[step, 1:], open_loop=False) peds = np.concatenate((peds, sim._peds), axis=0) car = np.concatenate( (car, np.expand_dims(sim._car, axis=0)), axis=0) car_obs = np.concatenate((car_obs, sim._car_obs), axis=0) import matplotlib.pyplot as plt plt.scatter(car[:, 2], car[:, 3]) plt.scatter(peds[:, 2], peds[:, 3]) plt.scatter(car_obs[:, 2], car_obs[:, 3]) pdb.set_trace() print('done!')
import os import time import joblib import numpy as np import tensorflow as tf from garage.tf.envs.base import TfEnv from .cartpole import CartPoleEnv os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # just use CPU env = TfEnv(CartPoleEnv(use_seed=False)) with tf.Session() as sess: data = joblib.load("../Cartpole/control_policy.pkl") agent = data['policy'] o = env.reset() agent.reset() path_length = 0 env.render() max_path_length = 100 total_r = 0 while path_length < max_path_length: a, agent_info = agent.get_action(o) a = np.argmax(agent_info["prob"]) next_o, r, d, env_info = env.step(a) path_length += 1 total_r += r
tabular_log_file = osp.join(log_dir, "progress.csv") text_log_file = osp.join(log_dir, "debug.log") params_log_file = osp.join(log_dir, "params.json") pkl_file = osp.join(log_dir, "params.pkl") logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode("gap") logger.set_snapshot_gap(1) logger.set_log_tabular_only(False) logger.push_prefix("[%s] " % "Carpole-RL") env = TfEnv(CartPoleEnv(use_seed=False)) # env = TfEnv(GridWorldEnv()) policy = CategoricalMLPPolicy( name='protagonist', env_spec=env.spec, # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000,
def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv( normalize( ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler sampler_args['sim'] = sim sampler_args['reward_function'] = reward_function local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args) if save_expert_trajectory: load_convert_and_save_drl_expert_trajectory( last_iter_filename=os.path.join( run_experiment_args['log_dir'], 'itr_' + str(runner_args['n_epochs'] - 1) + '.pkl'), expert_trajectory_filename=os.path.join( run_experiment_args['log_dir'], 'expert_trajectory.pkl')) print('done!')
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) env1 = gym.make(id=env_args.pop('id'), simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env2 = normalize(env1) env = TfEnv(env2) sampler_cls = BatchSampler # sampler_args = {'n_envs': n_parallel} sampler_args = {} # expert_trajectory_file = log_dir + '/expert_trajectory.p' # with open(expert_trajectory_file, 'rb') as f: # expert_trajectory = pickle.load(f) # # #Run backwards algorithm to robustify with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as local_runner: policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) # name='lstm_policy', # env_spec=env.spec, # hidden_dim=64, # use_peepholes=True) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = BackwardAlgorithm(env=env, env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) # expert_trajectory=expert_trajectory[-1], # epochs_per_step = 10, # scope=None, # max_path_length=max_path_length, # discount=discount, # gae_lambda=1, # center_adv=True, # positive_adv=False, # fixed_horizon=False, # pg_loss='surrogate_clip', # lr_clip_range=1.0, # max_kl_step=1.0, # policy_ent_coeff=0.0, # use_softplus_entropy=False, # use_neg_logli_entropy=False, # stop_entropy_gradient=False, # entropy_method='no_entropy', # name='PPO', # ) local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) results = local_runner.train(**runner_args) # pdb.set_trace() print('done') log_dir = run_experiment_args['log_dir'] with open(log_dir + '/paths.gz', 'wb') as f: try: compress_pickle.dump(results, f, compression="gzip", set_default_extension=False) except MemoryError: print('1') # pdb.set_trace() for idx, result in enumerate(results): with open( log_dir + '/path_' + str(idx) + '.gz', 'wb') as ff: try: compress_pickle.dump( result, ff, compression="gzip", set_default_extension=False) except MemoryError: print('2')