def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner( snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv(normalize(ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args ))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)} algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler local_runner.setup( algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={"open_loop": False, "sim": sim, "reward_function": reward_function, 'n_envs': n_parallel}) # Run the experiment local_runner.train(**runner_args)
def run_task(snapshot_config, *_): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=1) as runner: # Instantiate the example classes sim = ExampleAVSimulator() reward_function = ExampleAVReward() spaces = ExampleAVSpaces() # Create the environment env = TfEnv( normalize( ASTEnv(blackbox_sim_state=True, fixed_init_state=True, s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], simulator=sim, reward_function=reward_function, spaces=spaces))) # Instantiate the garage objects policy = GaussianLSTMPolicy(name='lstm_policy', env_spec=env.spec, hidden_dim=64) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=max_path_length, discount=0.99, kl_constraint='soft', max_kl_step=0.01) sampler_cls = ASTVectorizedSampler runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={ "sim": sim, "reward_function": reward_function }) runner.train(n_epochs=1, batch_size=4000, plot=False) print("Installation successfully validated")
def test_example_av_spaces(): space = ExampleAVSpaces(num_peds=2) assert isinstance(space.action_space, Box) assert isinstance(space.observation_space, Box)
def run_task(snapshot_config, *_): seed = 0 # top_k = 10 np.random.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env = TfEnv(env) policy = ContinuousMLPPolicy(name='ast_agent', env_spec=env.spec, **policy_args) params = policy.get_params() sess.run(tf.variables_initializer(params)) # Instantiate the garage objects baseline = ZeroBaseline(env_spec=env.spec) top_paths = BPQ.BoundedPriorityQueue(**bpq_args) sampler_cls = ASTVectorizedSampler sampler_args = { "open_loop": False, "sim": sim, "reward_function": reward_function, "n_envs": n_parallel } if ga_type == 'ga': print('ga') algo = GA(env_spec=env.spec, policy=policy, baseline=baseline, top_paths=top_paths, **algo_args) elif ga_type == 'gasm': print('gasm') algo = GASM(env_spec=env.spec, policy=policy, baseline=baseline, top_paths=top_paths, **algo_args) else: raise NotImplementedError local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args)
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) # blackbox_sim_state=True, # open_loop=False, # fixed_initial_state=True, # max_path_length=max_path_length) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) # env1 = gym.make('ast_toolbox:GoExploreAST-v1', # blackbox_sim_state=True, # open_loop=False, # fixed_init_state=True, # s_0=s_0, # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) env1 = gym.make(id=env_args.pop('id'), simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env2 = normalize(env1) env = TfEnv(env2) # Instantiate the garage objects policy = GoExplorePolicy(env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) algo = GoExplore(env_spec=env.spec, env=env, policy=policy, baseline=baseline, **algo_args) # db_filename=db_filename, # max_db_size=max_db_size, # env=env, # # policy=policy, # baseline=baseline, # # robust_policy=robust_policy, # # robust_baseline=robust_baseline, # max_path_length=max_path_length, # discount=discount, # save_paths_gap=1, # save_paths_path=log_dir, # # whole_paths=whole_paths # ) sampler_cls = BatchSampler # sampler_args = {'n_envs': n_parallel} sampler_args = {} with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as local_runner: local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # local_runner.setup( # algo=algo, # env=env, # sampler_cls=sampler_cls, # sampler_args={"sim": sim, # "reward_function": reward_function}) # Run the experiment best_cell = local_runner.train( **runner_args ) # n_epochs=n_itr, batch_size=batch_size, plot=False) log_dir = run_experiment_args['log_dir'] db_filename = algo_args['db_filename'] s_0 = env_args['s_0'] pool_DB = db.DB() pool_DB.open(db_filename + '_pool.dat', dbname=None, dbtype=db.DB_HASH, flags=db.DB_CREATE) d_pool = shelve.Shelf(pool_DB, protocol=pickle.HIGHEST_PROTOCOL) # pdb.set_trace() print(best_cell) temp = best_cell paths = [] while (temp.parent is not None): print(temp.observation) action = temp.observation[1:].astype(np.float32) / 1000 paths.append({ 'state': temp.state, 'reward': temp.reward, 'action': action, 'observation': np.array(s_0) }) temp = d_pool[temp.parent] print(temp.observation) paths.append({ 'state': temp.state, 'reward': temp.reward, 'action': action, 'observation': np.array(s_0) }) # pdb.set_trace() d_pool.close() with open(log_dir + '/expert_trajectory.p', 'wb') as f: pickle.dump([paths], f) print('done!')
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as runner: # Instantiate the example classes # sim = ExampleAVSimulator() g = 9.8 # acceleration due to gravity # this is y lat_params = rss.LateralParams( 0, # ρ 0.1 * g, # a_lat_max_acc 0.05 * g, # a_lat_min_brake 1.4 # Buffer distance ) # this is x long_params = rss.LongitudinalParams( 0, # ρ 0.7 * g, # a_max_brake 0.1 * g, # a_max_acc 0.7 * g, # a_min_brake1 0.7 * g, # a_min_brake2 2.5, # Buffer ) sim = AVRSSSimulator(lat_params, long_params) reward_function = HeuristicReward( PedestrianNoiseGaussian(1, 1, 0.2, .01), np.array([-10000, -1000, 0])) # reward_function = ExampleAVReward() spaces = ExampleAVSpaces() # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces s_0 = [-1.0, -2.0, 1.0, 11.17, -35.0] # ) env1 = gym.make('ast_toolbox:GoExploreAST-v1', open_loop=False, action_only=True, fixed_init_state=True, s_0=s_0, simulator=sim, reward_function=reward_function, spaces=spaces) env2 = normalize(env1) env = TfEnv(env2) # Instantiate the garage objects policy = GoExplorePolicy(env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = GoExplore( db_filename=db_filename, max_db_size=max_db_size, env=env, env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=max_path_length, discount=discount, # whole_paths=whole_paths ) sampler_cls = BatchSampler sampler_args = {'n_envs': n_parallel} runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # runner.setup( # algo=algo, # env=env, # sampler_cls=sampler_cls, # sampler_args={"sim": sim, # "reward_function": reward_function}) # Run the experiment paths = runner.train(n_epochs=n_itr, batch_size=batch_size, plot=False) print(paths) best_traj = paths.trajectory * np.array([ 1, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000, 1 / 1000 ]) peds = sim._peds car = np.expand_dims(sim._car, axis=0) car_obs = sim._car_obs for step in range(best_traj.shape[0]): sim.step(action=best_traj[step, 1:], open_loop=False) peds = np.concatenate((peds, sim._peds), axis=0) car = np.concatenate( (car, np.expand_dims(sim._car, axis=0)), axis=0) car_obs = np.concatenate((car_obs, sim._car_obs), axis=0) import matplotlib.pyplot as plt plt.scatter(car[:, 2], car[:, 3]) plt.scatter(peds[:, 2], peds[:, 3]) plt.scatter(car_obs[:, 2], car_obs[:, 3]) pdb.set_trace() print('done!')
def run_task(snapshot_config, *_): seed = 0 # top_k = 10 np.random.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) top_paths = BPQ.BoundedPriorityQueue(**bpq_args) if mcts_type == 'mcts': print('mcts') algo = MCTS(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsbv': print('mctsbv') algo = MCTSBV(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsrs': print('mctsrs') algo = MCTSRS(env=env, top_paths=top_paths, **algo_args) else: raise NotImplementedError sampler_cls = ASTVectorizedSampler local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={ "open_loop": False, "sim": sim, "reward_function": reward_function, "n_envs": n_parallel }) # Run the experiment local_runner.train(**runner_args) log_dir = run_experiment_args['log_dir'] with open(log_dir + '/best_actions.p', 'rb') as f: best_actions = pickle.load(f) expert_trajectories = [] for actions in best_actions: sim.reset(s_0=env_args['s_0']) path = [] for action in actions: obs = sim.step(action) state = sim.clone_state() reward = reward_function.give_reward( action=action, info=sim.get_reward_info()) path.append({ 'state': state, 'reward': reward, 'action': action, 'observation': obs }) expert_trajectories.append(path) with open(log_dir + '/expert_trajectory.p', 'wb') as f: pickle.dump(expert_trajectories, f)
def __init__(self, open_loop=True, blackbox_sim_state=True, fixed_init_state=False, s_0=None, simulator=None, reward_function=None, spaces=None): # gym_env = gym.make('ast_toolbox:GoExploreAST-v0', {'test':'test string'}) # pdb.set_trace() # super().__init__(gym_env) # Constant hyper-params -- set by user self.open_loop = open_loop self.blackbox_sim_state = blackbox_sim_state # is this redundant? self.spaces = spaces if spaces is None: self.spaces = ExampleAVSpaces() # These are set by reset, not the user self._done = False self._reward = 0.0 self._info = {} self._step = 0 self._action = None self._actions = [] self._first_step = True self.reward_range = (-float('inf'), float('inf')) self.metadata = None self.spec._entry_point = [] self._cum_reward = 0.0 self.root_action = None self.sample_limit = 10000 self.simulator = simulator if self.simulator is None: self.simulator = ExampleAVSimulator() if s_0 is None: self._init_state = self.observation_space.sample() else: self._init_state = s_0 self._fixed_init_state = fixed_init_state self.reward_function = reward_function if self.reward_function is None: self.reward_function = ExampleAVReward() if hasattr(self.simulator, "vec_env_executor") and callable(getattr(self.simulator, "vec_env_executor")): self.vectorized = True else: self.vectorized = False # super().__init__(self) # Always call Serializable constructor last self.params_set = False self.db_filename = 'database.dat' self.key_list = [] self.max_value = 0 self.robustify_state = [] self.robustify = False Parameterized.__init__(self)
def run_task(snapshot_config, *_): seed = 0 # top_k = 10 np.random.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner( snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args ) top_paths = BPQ.BoundedPriorityQueue(**bpq_args) if mcts_type == 'mcts': print('mcts') algo = MCTS(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsbv': print('mctsbv') algo = MCTSBV(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsrs': print('mctsrs') algo = MCTSRS(env=env, top_paths=top_paths, **algo_args) else: raise NotImplementedError sampler_cls = ASTVectorizedSampler sampler_args['sim'] = sim sampler_args['reward_function'] = reward_function local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args) log_dir = run_experiment_args['log_dir'] if save_expert_trajectory: load_convert_and_save_mcts_expert_trajectory( best_actions_filename=log_dir + '/best_actions.p', expert_trajectory_filename=log_dir + '/expert_trajectory.p', sim=sim, s_0=env_args['s_0'], reward_function=reward_function)
def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv( normalize( ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler sampler_args['sim'] = sim sampler_args['reward_function'] = reward_function local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args) if save_expert_trajectory: load_convert_and_save_drl_expert_trajectory( last_iter_filename=os.path.join( run_experiment_args['log_dir'], 'itr_' + str(runner_args['n_epochs'] - 1) + '.pkl'), expert_trajectory_filename=os.path.join( run_experiment_args['log_dir'], 'expert_trajectory.pkl')) print('done!')
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) env1 = gym.make(id=env_args.pop('id'), simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env2 = normalize(env1) env = TfEnv(env2) sampler_cls = BatchSampler # sampler_args = {'n_envs': n_parallel} sampler_args = {} # expert_trajectory_file = log_dir + '/expert_trajectory.p' # with open(expert_trajectory_file, 'rb') as f: # expert_trajectory = pickle.load(f) # # #Run backwards algorithm to robustify with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as local_runner: policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) # name='lstm_policy', # env_spec=env.spec, # hidden_dim=64, # use_peepholes=True) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = BackwardAlgorithm(env=env, env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) # expert_trajectory=expert_trajectory[-1], # epochs_per_step = 10, # scope=None, # max_path_length=max_path_length, # discount=discount, # gae_lambda=1, # center_adv=True, # positive_adv=False, # fixed_horizon=False, # pg_loss='surrogate_clip', # lr_clip_range=1.0, # max_kl_step=1.0, # policy_ent_coeff=0.0, # use_softplus_entropy=False, # use_neg_logli_entropy=False, # stop_entropy_gradient=False, # entropy_method='no_entropy', # name='PPO', # ) local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) results = local_runner.train(**runner_args) # pdb.set_trace() print('done') log_dir = run_experiment_args['log_dir'] with open(log_dir + '/paths.gz', 'wb') as f: try: compress_pickle.dump(results, f, compression="gzip", set_default_extension=False) except MemoryError: print('1') # pdb.set_trace() for idx, result in enumerate(results): with open( log_dir + '/path_' + str(idx) + '.gz', 'wb') as ff: try: compress_pickle.dump( result, ff, compression="gzip", set_default_extension=False) except MemoryError: print('2')