def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner( snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv(normalize(ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args ))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)} algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler local_runner.setup( algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={"open_loop": False, "sim": sim, "reward_function": reward_function, 'n_envs': n_parallel}) # Run the experiment local_runner.train(**runner_args)
def run_task(snapshot_config, *_): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=1) as runner: # Instantiate the example classes sim = ExampleAVSimulator() reward_function = ExampleAVReward() spaces = ExampleAVSpaces() # Create the environment env = TfEnv( normalize( ASTEnv(blackbox_sim_state=True, fixed_init_state=True, s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], simulator=sim, reward_function=reward_function, spaces=spaces))) # Instantiate the garage objects policy = GaussianLSTMPolicy(name='lstm_policy', env_spec=env.spec, hidden_dim=64) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=max_path_length, discount=0.99, kl_constraint='soft', max_kl_step=0.01) sampler_cls = ASTVectorizedSampler runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={ "sim": sim, "reward_function": reward_function }) runner.train(n_epochs=1, batch_size=4000, plot=False) print("Installation successfully validated")
def run_task(snapshot_config, *_): seed = 0 # top_k = 10 np.random.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env = TfEnv(env) policy = ContinuousMLPPolicy(name='ast_agent', env_spec=env.spec, **policy_args) params = policy.get_params() sess.run(tf.variables_initializer(params)) # Instantiate the garage objects baseline = ZeroBaseline(env_spec=env.spec) top_paths = BPQ.BoundedPriorityQueue(**bpq_args) sampler_cls = ASTVectorizedSampler sampler_args = { "open_loop": False, "sim": sim, "reward_function": reward_function, "n_envs": n_parallel } if ga_type == 'ga': print('ga') algo = GA(env_spec=env.spec, policy=policy, baseline=baseline, top_paths=top_paths, **algo_args) elif ga_type == 'gasm': print('gasm') algo = GASM(env_spec=env.spec, policy=policy, baseline=baseline, top_paths=top_paths, **algo_args) else: raise NotImplementedError local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args)
def run_task(snapshot_config, *_): seed = 0 # top_k = 10 np.random.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) top_paths = BPQ.BoundedPriorityQueue(**bpq_args) if mcts_type == 'mcts': print('mcts') algo = MCTS(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsbv': print('mctsbv') algo = MCTSBV(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsrs': print('mctsrs') algo = MCTSRS(env=env, top_paths=top_paths, **algo_args) else: raise NotImplementedError sampler_cls = ASTVectorizedSampler local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={ "open_loop": False, "sim": sim, "reward_function": reward_function, "n_envs": n_parallel }) # Run the experiment local_runner.train(**runner_args) log_dir = run_experiment_args['log_dir'] with open(log_dir + '/best_actions.p', 'rb') as f: best_actions = pickle.load(f) expert_trajectories = [] for actions in best_actions: sim.reset(s_0=env_args['s_0']) path = [] for action in actions: obs = sim.step(action) state = sim.clone_state() reward = reward_function.give_reward( action=action, info=sim.get_reward_info()) path.append({ 'state': state, 'reward': reward, 'action': action, 'observation': obs }) expert_trajectories.append(path) with open(log_dir + '/expert_trajectory.p', 'wb') as f: pickle.dump(expert_trajectories, f)
def run_task(snapshot_config, *_): seed = 0 # top_k = 10 np.random.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner( snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args ) top_paths = BPQ.BoundedPriorityQueue(**bpq_args) if mcts_type == 'mcts': print('mcts') algo = MCTS(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsbv': print('mctsbv') algo = MCTSBV(env=env, top_paths=top_paths, **algo_args) elif mcts_type == 'mctsrs': print('mctsrs') algo = MCTSRS(env=env, top_paths=top_paths, **algo_args) else: raise NotImplementedError sampler_cls = ASTVectorizedSampler sampler_args['sim'] = sim sampler_args['reward_function'] = reward_function local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args) log_dir = run_experiment_args['log_dir'] if save_expert_trajectory: load_convert_and_save_mcts_expert_trajectory( best_actions_filename=log_dir + '/best_actions.p', expert_trajectory_filename=log_dir + '/expert_trajectory.p', sim=sim, s_0=env_args['s_0'], reward_function=reward_function)
def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv( normalize( ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler sampler_args['sim'] = sim sampler_args['reward_function'] = reward_function local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args) if save_expert_trajectory: load_convert_and_save_drl_expert_trajectory( last_iter_filename=os.path.join( run_experiment_args['log_dir'], 'itr_' + str(runner_args['n_epochs'] - 1) + '.pkl'), expert_trajectory_filename=os.path.join( run_experiment_args['log_dir'], 'expert_trajectory.pkl')) print('done!')