def main(): parser = argparse.ArgumentParser() parser.add_argument( "--file", type=str, default="", ) args = parser.parse_args() _, get_action = load_policy(args.file) env = gym.make('HumanoidRL-v0') run_policy(env, get_action)
def run(train_fn, env_config, net_config=None): mode = '--train' if not sys.argv[1:] else sys.argv[1] if mode == '--train': train_fn() elif mode == '--test': if not sys.argv[2:]: model_path = get_latest_model_path() # TODO else: model_path = sys.argv[2] env = gym.make(env_config['env_name']) _, get_action = load_policy_and_env(fpath=model_path, deterministic=True, env=env, net_config=net_config) run_policy(env, get_action)
def run(train_fn, env_config, net_config=None): mode = '--train' if not sys.argv[1:] else sys.argv[1] if mode == '--train': train_fn() elif mode == '--play': from deepdrive_zero import player env_config['physics_steps_per_observation'] = 1 player.start(env_config=env_config) elif mode == '--test': if not sys.argv[2:]: model_path = get_latest_model_path() # TODO else: model_path = sys.argv[2] env = gym.make(env_config['env_name']) env.configure_env(env_config) _, get_action = load_policy_and_env(fpath=model_path, deterministic=True, env=env, net_config=net_config) run_policy(env, get_action)
if TEST_STATIC_OBSTACLE: _, get_action = load_policy( '/home/c2/src/spinningup/data/dd2d-ppo-intersection/dd2d-ppo-intersection_s0', use_model_only=False) env = gym.make('deepdrive-2d-static-obstacle-no-g-pen-v0') else: p = '/home/c2/src/tmp/spinningup/data/deepdrive-2d-intersection-no-constrained-controls-example/deepdrive-2d-intersection-no-constrained-controls-example_s0_2020_03-10_13-14.50/best_HorizonReturn/2020_03-11_11-36.27' if 'no-end-g' in p or 'no-contraint-g' in p or 'no-g' in p or 'no-constrain' in p: os.environ['END_ON_HARMFUL_GS'] = '0' os.environ['GFORCE_PENALTY_COEFF'] = '0' os.environ['JERK_PENALTY_COEFF'] = '0' if 'no-constrain' in p: os.environ['CONSTRAIN_CONTROLS'] = '0' if 'delta-controls' in p or 'deepdrive-2d-intersection-no-g-or-jerk2' in p: os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '1' else: os.environ['EXPECT_NORMALIZED_ACTION_DELTAS'] = '0' if 'one-waypoint' in p: env_name = 'deepdrive-2d-one-waypoint-v0' else: env_name = 'deepdrive-2d-intersection-w-gs-allow-decel-v0' _, get_action = load_policy_and_env(p, deterministic=True) # env = gym.make('deepdrive-2d-intersection-v0') env = gym.make(env_name) env.configure_env() # env.unwrapped.physics_steps_per_observation = 1 run_policy(env, get_action)
def test_ppo(exp_dir, itr='last'): _, get_action, lstm = load_policy_and_env(exp_dir, itr=itr) env = hide_and_seek.make_env() run_policy(env, get_action, lstm=lstm)
import gym import spinup import tensorflow as tf import spinup.utils.logx from spinup.utils.test_policy import load_policy, run_policy ''' # METHOD 1 (See Docs) # BipedalWalker-v2 modelpath = '/home/watate/[email protected]/Python/spinningup/data/ex5_ddpg_100ep_bipedalwalker-v2/ex5_ddpg_100ep_bipedalwalker-v2_s0' len = 0 episodes = 100 norender = False #itr = -1 itr = -100 #Only for soft-actor critic deterministic = False # This part is unecessary because load_policy already restores tf_graph #model = spinup.utils.logx.restore_tf_graph(sess, modelpath) env, get_action = load_policy(modelpath, itr if itr >=0 else 'last', deterministic) run_policy(env, get_action, len, episodes, not(norender)) '''
from spinup.utils.test_policy import load_policy, run_policy import gym import gym_geofriend2 _, get_action = load_policy('./spinupPpo') env = gym.make("geofriend2-v0") #Pyramid(), run_policy(env, get_action, max_ep_len=200)
from spinup.utils.test_policy import load_policy, run_policy import gym import argparse if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, required=True) parser.add_argument('--save_dir', type=str, required=True) parser.add_argument("-d", '--deterministic', action="store_true") parser.add_argument("-e", "--episodes", type=int, default=100) parser.add_argument("-dr", "--dont_render", action="store_false") args = parser.parse_args() if args.dont_render == False: render_it = False else: render_it = True print(args.save_dir) _, get_action = load_policy(args.save_dir, deterministic=args.deterministic) env = gym.make(args.env) run_policy(env, get_action, num_episodes=args.episodes, render=render_it)