def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() logger.configure() # baselines logger train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): """ Runs the test """ logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render model = train(num_timesteps=1, seed=args.seed) tf_util.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) obs = env.reset() while True: action = model.policy.act(stochastic=False, obs=obs)[0] obs, _, done, _ = env.step(action) env.render() if done: obs = env.reset()
def main(): main_dir = "simulation_results" try: os.mkdir(main_dir) except FileExistsError: pass comm = MPI.COMM_WORLD rank = comm.Get_rank() parser = mujoco_arg_parser() parser.add_argument('--algorithm', help="The algorithm which shall be used, TRPO or PPO", type=str, default="TRPO") args = parser.parse_args() algorithm = args.algorithm if rank == 0: save_dir = [ os.path.join(main_dir, time.strftime('%Y_%m_%d-%Hh_%Mm_%Ss', time.localtime(time.time())) + "-" + algorithm) ] else: save_dir = None save_dir = comm.bcast(save_dir, root=0) # Unpack list save_dir = save_dir[0] model_file = os.path.join(save_dir, "model") log_dir = os.path.join(save_dir, "log") train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, algorithm=algorithm, model_save_file=model_file, log_dir=log_dir)
def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() train(args.env, num_timesteps=args.num_timesteps, seed=args.run, lam=args.lam, sgd_steps=args.sgd_steps, klcoeff=args.klcoeff, log=args.log)
def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() logger.configure() model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) if args.play: logger.log("Running trained model") obs = np.zeros((env.num_envs,) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:] = env.step(actions)[0] env.render('human')
def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() logger.configure() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed) env = make_mujoco_env(args.env, args.seed) model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear') model.learn(total_timesteps=args.num_timesteps) model.save("ppo1") # env.close() del model # remove to demonstrate saving and loading # env = make_mujoco_env(args.env, args.seed) model = PPO1.load("ppo1") logger.log("~!!!!!!!!") episode_rew = 0 obs = env.reset() while True: action, _states = model.predict(obs) ob, reward, done, info = env.step(action) episode_rew += reward env.render() if done: print(f'episode_rew={episode_rew}') episode_rew = 0 obs = env.reset()
def main(): """ Runs the testd """ args = mujoco_arg_parser().parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['OMP_NUM_THREADS'] = '1' os.environ['OPENBLAS_NUM_THREADS'] = '1' log = not args.no_log is_action_features = not args.states # for t_c in t_cs: # for t_pi in t_pis: # for lam in lams: # args.lam = lam # args.t_c = t_c # args.t_pi = t_pi for seed in range(args.num_seeds): train(args.env, algo=args.algo, num_timesteps=args.num_timesteps, seed=(seed + args.seed_offset), expert_model=args.expert_model, expert_path=args.expert_path, num_trajectories=args.num_trajectories, is_action_features=is_action_features, sgd_steps=args.sgd_steps, mdpo_update_steps=args.mdpo_update_steps, lipschitz=args.lipschitz, t_pi=args.t_pi, t_c=args.t_c, lam=args.lam, log=log, pretrain=args.pretrain, pretrain_epochs=args.pretrain_epochs, exploration_bonus=args.exploration, bonus_coef=args.bonus_coef, random_action_len=args.random_action_len, dir_name=args.dir_name, neural=args.neural, args=args)
try: return eval(v) except (NameError, SyntaxError): return v return {k: parse(v) for k, v in parse_unknown_args(args).items()} if __name__ == '__main__': import time import os from stable_baselines.common.cmd_util import mujoco_arg_parser from stable_baselines.low_dim_analysis.common_parser import get_common_parser parser = get_common_parser() openai_arg_parser = mujoco_arg_parser() plot_args, plot_unknown_args = parser.parse_known_args() openai_args, openai_unknown_args = openai_arg_parser.parse_known_args() plot_unknown_args = parse_cmdline_kwargs(plot_unknown_args) openai_unknown_args = parse_cmdline_kwargs(openai_unknown_args) both_unknown_args = dict(plot_unknown_args.items() & openai_unknown_args.items()) threads_or_None = 'threads' if plot_args.use_threads else None logger.log(f"THREADS OR NOT: {threads_or_None}") plot_dir_alg = get_plot_dir(plot_args.alg, plot_args.num_timesteps, plot_args.env, plot_args.normalize, plot_args.run_num)
def train(args): """ Runs the test """ args, argv = mujoco_arg_parser().parse_known_args(args) logger.log(f"#######TRAIN: {args}") args.alg = "ppo2" this_run_dir = get_dir_path_for_this_run(args) if os.path.exists(this_run_dir): import shutil shutil.rmtree(this_run_dir) os.makedirs(this_run_dir) log_dir = get_log_dir(this_run_dir) save_dir = get_save_dir(this_run_dir) logger.configure(log_dir) def make_env(): env_out = gym.make(args.env) env_out.env.visualize = False env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out env = DummyVecEnv([make_env]) env.envs[0].env.env.disableViewer = True set_global_seeds(args.seed) env.envs[0].env.env.seed(args.seed) if args.normalize: env = VecNormalize(env) policy = MlpPolicy # extra run info I added for my purposes full_param_traj_dir_path = get_full_params_dir(this_run_dir) if os.path.exists(full_param_traj_dir_path): import shutil shutil.rmtree(full_param_traj_dir_path) os.makedirs(full_param_traj_dir_path) if os.path.exists(save_dir): import shutil shutil.rmtree(save_dir) os.makedirs(save_dir) run_info = { "run_num": args.run_num, "env_id": args.env, "full_param_traj_dir_path": full_param_traj_dir_path, "state_samples_to_collect": args.state_samples_to_collect } model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10, ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer, seed=args.seed) model.tell_run_info(run_info) model.learn(total_timesteps=args.num_timesteps) model.save(f"{save_dir}/ppo2") if args.normalize: env.save_running_average(save_dir)
def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() train(args.env, num_timesteps=args.num_timesteps, run=args.run, kappa=args.kappa, vf_phi_update_interval=args.vf_phi_update_interval, log=args.log)
def main(): """ Runs the test """ parser = mujoco_arg_parser() parser.add_argument( '--model-path', default="/cvgl2/u/surajn/workspace/saved_models/sawyerlift_ppo2/model") parser.add_argument('--images', default=False) args = parser.parse_args() logger.configure() if not args.play: model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path, images=args.images) if args.play: def make_env(): env_out = GymWrapper( suite.make( "SawyerLift", use_camera_obs=False, # do not use pixel observations has_offscreen_renderer= False, # not needed since not using pixel obs has_renderer=True, # make sure we can render to the screen reward_shaping=True, # use dense rewards control_freq= 10, # control should happen fast enough so that simulation looks smooth )) env_out.reward_range = None env_out.metadata = None env_out.spec = None env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out #env = make_env() env = DummyVecEnv([make_env]) env = VecNormalize(env) policy = MlpPolicy #model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, # optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=1) model = TRPO(MlpPolicy, env, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, entcoeff=0.0, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3) model.load(args.model_path) logger.log("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() while True: env.render() actions = model.step(obs)[0] obs[:] = env.step(actions)[0]
def main(): """ Runs the test """ args = mujoco_arg_parser().parse_args() train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)