gforce_threshold=1.0, # https://iopscience.iop.org/article/10.1088/0143-0807/37/6/065008/pdf # Importantly they depict the threshold # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3. jerk_threshold=150.0, # 15g/s incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, physics_steps_per_observation=12, discrete_actions=COMFORTABLE_ACTIONS, ) net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add( 'resume', '/home/c2/src/tmp/spinningup/data/intersection_from_scratch_discrete_steer_and_accel/intersection_from_scratch_discrete_steer_and_accel_s0_2020_04-22_13-12.42.071188' ) # eg.add('reinitialize_optimizer_on_resume', True) # eg.add('num_inputs_to_add', 0) # eg.add('pi_lr', 3e-6) # eg.add('vf_lr', 1e-5) # eg.add('boost_explore', 5) eg.add('epochs', 20000) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '')
for setting in settings: division = int(total / len(setting)) index = int(remainder / division) remainder = remainder % division indexes.append(index) total = division actual_setting = {} for j in range(len(indexes)): actual_setting[setting_names[j]] = settings[j][indexes[j]] return indexes, actual_setting indexes, actual_setting = get_setting(args.setting, total, settings, setting_names) #################################################################################################### ## use eg.add to add parameters in the settings or add parameters that apply to all jobs eg = ExperimentGrid(name=exp_name) eg.add('ue_seed', 21, 'ues', True) eg.add('lr', actual_setting['lr'], 'lr', True) eg.add('border', actual_setting['border'], 'border', True) eg.add('wd', 0, 'wd', True) eg.add('buffer_type', 'FinalSigma0.5', 'Buf-', True) eg.add('buffer_size', '500K', '', True) eg.add('eval_freq', 500) eg.add('max_timesteps', 100000) eg.add('env_set', actual_setting['env_set'], '', True) eg.add('seed', actual_setting['seed']) eg.run(bc_ue_learn, num_cpu=args.cpu) print('\n###################################### GRID EXP END ######################################') print('total time for grid experiment:',time.time()-start_time)
def parse_and_execute_grid_search(cmd, args): """Interprets algorithm name and cmd line args into an ExperimentGrid.""" if cmd in BASE_ALGO_NAMES: backend = DEFAULT_BACKEND[cmd] print('\n\nUsing default backend (%s) for %s.\n'%(backend, cmd)) cmd = cmd + '_' + backend algo = eval('spinup.'+cmd) # Before all else, check to see if any of the flags is 'help'. valid_help = ['--help', '-h', 'help'] if any([arg in valid_help for arg in args]): print('\n\nShowing docstring for spinup.'+cmd+':\n') print(algo.__doc__) sys.exit() def process(arg): # Process an arg by eval-ing it, so users can specify more # than just strings at the command line (eg allows for # users to give functions as args). try: return eval(arg) except: return arg # Make first pass through args to build base arg_dict. Anything # with a '--' in front of it is an argument flag and everything after, # until the next flag, is a possible value. arg_dict = dict() for i, arg in enumerate(args): assert i > 0 or '--' in arg, \ friendly_err("You didn't specify a first flag.") if '--' in arg: arg_key = arg.lstrip('-') arg_dict[arg_key] = [] else: arg_dict[arg_key].append(process(arg)) # Make second pass through, to catch flags that have no vals. # Assume such flags indicate that a boolean parameter should have # value True. for k,v in arg_dict.items(): if len(v) == 0: v.append(True) # Third pass: check for user-supplied shorthands, where a key has # the form --keyname[kn]. The thing in brackets, 'kn', is the # shorthand. NOTE: modifying a dict while looping through its # contents is dangerous, and breaks in 3.6+. We loop over a fixed list # of keys to avoid this issue. given_shorthands = dict() fixed_keys = list(arg_dict.keys()) for k in fixed_keys: p1, p2 = k.find('['), k.find(']') if p1 >= 0 and p2 >= 0: # Both '[' and ']' found, so shorthand has been given k_new = k[:p1] shorthand = k[p1+1:p2] given_shorthands[k_new] = shorthand arg_dict[k_new] = arg_dict[k] del arg_dict[k] # Penultimate pass: sugar. Allow some special shortcuts in arg naming, # eg treat "env" the same as "env_name". This is super specific # to Spinning Up implementations, and may be hard to maintain. # These special shortcuts are described by SUBSTITUTIONS. for special_name, true_name in SUBSTITUTIONS.items(): if special_name in arg_dict: # swap it in arg dict arg_dict[true_name] = arg_dict[special_name] del arg_dict[special_name] if special_name in given_shorthands: # point the shortcut to the right name given_shorthands[true_name] = given_shorthands[special_name] del given_shorthands[special_name] # Final pass: check for the special args that go to the 'run' command # for an experiment grid, separate them from the arg dict, and make sure # that they have unique values. The special args are given by RUN_KEYS. run_kwargs = dict() for k in RUN_KEYS: if k in arg_dict: val = arg_dict[k] assert len(val) == 1, \ friendly_err("You can only provide one value for %s."%k) run_kwargs[k] = val[0] del arg_dict[k] # Determine experiment name. If not given by user, will be determined # by the algorithm name. if 'exp_name' in arg_dict: assert len(arg_dict['exp_name']) == 1, \ friendly_err("You can only provide one value for exp_name.") exp_name = arg_dict['exp_name'][0] del arg_dict['exp_name'] else: exp_name = 'cmd_' + cmd # Make sure that if num_cpu > 1, the algorithm being used is compatible # with MPI. if 'num_cpu' in run_kwargs and not(run_kwargs['num_cpu'] == 1): assert cmd in add_with_backends(MPI_COMPATIBLE_ALGOS), \ friendly_err("This algorithm can't be run with num_cpu > 1.") # Special handling for environment: make sure that env_name is a real, # registered gym environment. valid_envs = [e.id for e in list(gym.envs.registry.all())] assert 'env_name' in arg_dict, \ friendly_err("You did not give a value for --env_name! Add one and try again.") for env_name in arg_dict['env_name']: err_msg = dedent(""" %s is not registered with Gym. Recommendations: * Check for a typo (did you include the version tag?) * View the complete list of valid Gym environments at https://gym.openai.com/envs/ """%env_name) assert env_name in valid_envs, err_msg # Construct and execute the experiment grid. eg = ExperimentGrid(name=exp_name) for k,v in arg_dict.items(): eg.add(k, v, shorthand=given_shorthands.get(k)) eg.run(algo, **run_kwargs)
import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--num_runs', type=int, default=1) parser.add_argument('--psp_type', type=str, default='Rand', help='Rand, Ones, Binary, Proposed, Sanity') parser.add_argument('--hidden_sizes', type=int, nargs='+', default=(160, 160, 160, 160, 160, 160)) args = parser.parse_args() hidden_sizes_name = '_'.join([str(num) for num in args.hidden_sizes]) #eg = ExperimentGrid(name='superpos_sac-MT10_with_bias_%s_context_q_%s' % (args.psp_type, hidden_sizes_name)) eg = ExperimentGrid(name='TIMETEST') eg.add('env_name', 'MT10Helper-v0', '', True) eg.add('num_tasks', 10) eg.add('batch_size', 128) # This is per task, so real is 128 x 10 eg.add('psp_type', args.psp_type) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 1000) eg.add('steps_per_epoch', TASK_HORIZON * PATHS_PER_TASK * NUM_TASKS) eg.add('update_after', TASK_HORIZON * NUM_TASKS * PATHS_PER_TASK) eg.add('lr', [3e-4]) eg.add('start_steps', TASK_HORIZON * PATHS_PER_TASK * NUM_TASKS) #eg.add('update_every', NUM_TASKS * ) eg.add('num_test_episodes', 10 * NUM_TASKS) eg.add('ac_kwargs:hidden_sizes', [tuple(args.hidden_sizes)], 'hid') eg.add('ac_kwargs:activation', [torch.nn.ReLU], '') eg.run(psp_sac_pytorch, num_cpu=args.cpu)
logger.log_tabular('Time', time.time() - start_time) logger.dump_tabular() if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') parser.add_argument('--h', type=int, default=300) parser.add_argument('--l', type=int, default=1) parser.add_argument('--num_runs', '-n', type=int, default=3) parser.add_argument('--steps_per_epoch', '-s', type=int, default=5000) parser.add_argument('--total_steps', '-t', type=int, default=int(5e4)) args = parser.parse_args() def td3_with_actor_critic(**kwargs): td3(ac_kwargs=dict(hidden_sizes=[args.h] * args.l), start_steps=5000, max_ep_len=150, batch_size=64, polyak=0.95, **kwargs) eg = ExperimentGrid(name='ex2-3_td3') eg.add('replay_size', int(args.total_steps)) eg.add('env_name', args.env, '', True) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', int(args.total_steps / args.steps_per_epoch)) eg.add('steps_per_epoch', args.steps_per_epoch) eg.add('remove_action_clip', [False, True]) eg.run(td3_with_actor_critic, datestamp=True)
from spinup.algos.ddpg.ddpg import ddpg from spinup.utils.run_utils import ExperimentGrid env_fn = lambda: gym.make('Pendulum-v0') network_kwargs = dict(hidden_sizes=[400, 300], activation=tf.nn.relu) logger_kwargs = dict(output_dir='logging/NAF', exp_name='naf - tests') steps_per_epoch = 1000 epochs = 100 start_steps = 50 algorithm = 'naf' if __name__ == '__main__': eg = ExperimentGrid(name='ddpg-bench-long') eg.add('env_name', 'Pendulum-v0', '', True) eg.add('seed', [10 * i for i in range(4)]) eg.add('epochs', 20) eg.add('steps_per_epoch', 1000) eg.add('ac_kwargs:hidden_sizes', [(100, 100), (400, 300)], 'hid') eg.add('ac_kwargs:activation', [tf.nn.relu], '') eg.run(ddpg, num_cpu=4, data_dir='logging/DDPG') # # # agent = naf(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=100, epochs=25, logger_kwargs=logger_kwargs) # # # agent = spinup.ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=500, epochs=250, logger_kwargs=logger_kwargs, # # start_steps=start_steps) # tf.reset_default_graph() # naf(env_fn=env_fn, ac_kwargs=network_kwargs, steps_per_epoch=steps_per_epoch, epochs=epochs, logger_kwargs=logger_kwargs,
# LunarLanderContinuous-v2 # MontezumaRevenge-ram-v0 # Enduro-ram-v0 # MsPacman-ram-v0 # Ant-v2 # HumanoidStandup-v2 if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--num_runs', type=int, default=10) # parser.add_argument('--clip_ratio', type=int, ) args = parser.parse_args() MontezumaRevenge = ExperimentGrid(name='vpg-nueve-singular') MontezumaRevenge.add('env_name', 'MontezumaRevenge-ram-v0', '', True) # eg.add('clip_ratio', [0.1,0.2]) MontezumaRevenge.add('seed', [10 * i for i in range(args.num_runs)]) MontezumaRevenge.add('epochs', 10) MontezumaRevenge.add('steps_per_epoch', [4000, 100]) MontezumaRevenge.add('optimizer', [ 'GradientDescentOptimizer', 'MomentumOptimizer', 'ProximalAdagradOptimizer', 'ProximalGradientDescentOptimizer', 'RMSPropOptimizer', 'AdaMaxOptimizer', 'AdamGSOptimizer', 'AdamWOptimizer', 'AddSignOptimizer', 'GGTOptimizer', 'LARSOptimizer', 'LazyAdamGSOptimizer', 'LazyAdamOptimizer', 'MomentumWOptimizer', 'NadamOptimizer', 'PowerSignOptimizer', 'RegAdagradOptimizer', 'ShampooOptimizer' ]) MontezumaRevenge.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid')
# LunarLanderContinuous-v2 # MontezumaRevenge-ram-v0 # Enduro-ram-v0 # MsPacman-ram-v0 # Ant-v2 # HumanoidStandup-v2 if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--num_runs', type=int, default=10) # parser.add_argument('--clip_ratio', type=int, ) args = parser.parse_args() LunarLanderContinuous = ExperimentGrid(name='vpg-ocho') LunarLanderContinuous.add('env_name', 'LunarLanderContinuous-v2', '', True) # eg.add('clip_ratio', [0.1,0.2]) LunarLanderContinuous.add('seed', [10 * i for i in range(args.num_runs)]) LunarLanderContinuous.add('epochs', 10) LunarLanderContinuous.add('steps_per_epoch', [4000, 100]) LunarLanderContinuous.add('optimizer', [ 'NadamOptimizer', 'PowerSignOptimizer', 'RegAdagradOptimizer', 'ShampooOptimizer' ]) LunarLanderContinuous.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid') LunarLanderContinuous.add('ac_kwargs:activation', [ tf.nn.relu, tf.nn.relu6, tf.nn.crelu, tf.nn.elu, tf.nn.selu, tf.nn.softplus, tf.nn.softsign, tf.sigmoid, tf.tanh ], '')
from spinup.utils.run_utils import ExperimentGrid from spinup import ppo import torch if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--num_runs', type=int, default=3) args = parser.parse_args() eg = ExperimentGrid(name='ppo-bench') eg.add('env_name', 'CartPole-v0', '', True) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 10) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid') eg.add('ac_kwargs:activation', [torch.tanh, torch.relu], '') eg.run(ppo, num_cpu=args.cpu)
from spinup.utils.run_utils import ExperimentGrid from spinup import fgym_trunk_sac_discrete_v2 import torch if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=1) parser.add_argument('--num_runs', type=int, default=2) args = parser.parse_args() eg = ExperimentGrid(name='sac-pyt') from src.Financial_gym.financial_env.fgym import * env = lambda: Assembly_Fin_for_pic( data_path='src/Financial_gym/data/pic/', # 文件存放路径 game=continus_Daily_Fin_Futures_holding_reward_pic, seed=123, windows=50, init_capital=1000000, show_statistics=True, drawdown=0.1) eg.add('environment', env) eg.add('show_kwargs_json', True) eg.add('env_name', 'Financial_gym_pic_daily', '', True) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 400) eg.add('save_freq', 5) # epoch save frequeece
from spinup.utils.run_utils import ExperimentGrid from spinup import ppo_pytorch import torch from gym_match_input_continuous.experiments import utils experiment_name = os.path.basename(__file__)[:-3] notes = """ Garbage into RL treasure out """ env_config = dict(env_name='match-input-continuous-v0', ) net_config = dict(hidden_units=(32, 32), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('gamma', 0.999) # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma) eg.add('epochs', 1000) eg.add('steps_per_epoch', 500) eg.add('try_rollouts', 2) eg.add('shift_advs_pct', 90) eg.add('take_worst_rollout', True) eg.add('steps_per_try_rollout', 1) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '')
# Import packages and environment import numpy as np from spinup.utils.run_utils import ExperimentGrid from spinup import soc_pytorch # from spinup import sac_pytorch # from spinup import ppo_pytorch import torch as th if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--num_runs', type=int, default=3) args = parser.parse_args() eg = ExperimentGrid(name='hac-hopper-soc') # eg.add('env_name', 'Walker2DBulletEnv-v0', '', True) eg.add('env_name', 'HopperBulletEnv-v0', '', True) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 250) eg.add('N_options', [2]) # 2,3 eg.add('ac_kwargs:hidden_sizes', [[128, 256, 128]], 'hid') eg.add('alpha', [0.1, 0.2]) eg.add('c', [0.2]) # 0.1,0.2,0.3 og evt. 0.0 eg.run(soc_pytorch)
if args.save_model_interval > 0 and (i_iter+1) % args.save_model_interval == 0: to_device(torch.device('cpu'), policy_net, value_net) pickle.dump((policy_net, value_net, running_state), open(os.path.join(assets_dir(), 'learned_models/{}_ppo.p'.format(args.env_name)), 'wb')) to_device(device, policy_net, value_net) # """clean up gpu memory""" torch.cuda.empty_cache() return agent.evaluate() print('a') print(config) print(args) return main_loop(config) def mock_train(**kwargs): config = { "lr": kwargs['lr'], "gamma": kwargs['gamma'] } print('a') print(config) print(args) eg = ExperimentGrid('hopper') eg.add('lr', [1e-4]) eg.add('gamma', [0.99, 0.95]) eg.run(train)
expect_normalized_action_deltas=False, jerk_penalty_coeff=0.20 / (60*100), gforce_penalty_coeff=0.06, collision_penalty_coeff=4, gforce_threshold=None, incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, ) net_config = dict( hidden_units=(256, 256), activation=torch.nn.Tanh ) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield/intersection_2_agents_fine_tune_add_left_yield_s0_2020_03-23_13-16.15') eg.add('reinitialize_optimizer_on_resume', False) # Old optimizer had only 28 inputs despite NN having 29! eg.add('num_inputs_to_add', 0) eg.add('pi_lr', 3e-6) eg.add('vf_lr', 1e-5) eg.add('boost_explore', 5) eg.add('epochs', 8000) eg.add('steps_per_epoch', 32000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '')
from spinup.utils.run_utils import ExperimentGrid from spinup import ppo_pytorch import torch if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=4) parser.add_argument('--num_runs', type=int, default=3) args = parser.parse_args() eg = ExperimentGrid(name='ppo-test-lunar') #eg.add('env_name', 'MountainCar-v0', '', True) #eg.add('env_name', 'CartPole-v0', '', True) #eg.add('env_name', 'gym_multiagent_control:foo-v0', '', True) eg.add('env_name', 'gym_multiagent_control:foo-v2', '', True) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 10) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid') eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '') eg.run(ppo_pytorch, num_cpu=args.cpu)
eg.add('epochs', 10) eg.add('steps_per_epoch', 5000) # Use default hidden sizes in actor_critic function, comment below out eg.add('ac_kwargs:hidden_sizes', [(32,)], 'hid') eg.add('ac_kwargs:activation', [tf.nn.relu], '') eg.run(algo[i], num_cpu=args.cpu) ''' #Training if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=1) parser.add_argument('--num_runs', type=int, default=1) args = parser.parse_args() eg = ExperimentGrid(name='ex4_trpo_30ep') eg.add('env_name', 'Acrobot-v1', '', True) eg.add('seed', [10*i for i in range(args.num_runs)]) eg.add('epochs', 30) #eg.add('steps_per_epoch', 4000) eg.add('max_ep_len', 1500) eg.add('ac_kwargs:activation', [tf.nn.relu], '') eg.add('ac_kwargs:hidden_sizes', [(16,),(16,16),(8,),(8,8),(4,),(4,4)], 'hid') eg.run(trpo, num_cpu=args.cpu)
env_config = dict( env_name='deepdrive-2d-intersection-w-gs-allow-decel-v0', is_intersection_map=True, expect_normalized_action_deltas=False, jerk_penalty_coeff=1.5 * 0.20 / (60 * 100), gforce_penalty_coeff=1.5 * 0.06, collision_penalty_coeff=1.5, end_on_harmful_gs=False, incent_win=True, constrain_controls=False, ) net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add( 'resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_collision_resume_add_comfort1/intersection_2_agents_fine_tune_collision_resume_add_comfort1_s0_2020_03-13_21-45.39' ) eg.add('reinitialize_optimizer_on_resume', True) eg.add( 'pi_lr', 3e-6 ) # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates eg.add( 'vf_lr', 1e-5 ) # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates eg.add('epochs', 8000) eg.add('steps_per_epoch', 16000)
parser.add_argument('--cpu', type=str, default='auto') parser.add_argument('--num_runs', type=int, default=3) args = parser.parse_args() x = 10 layers = [] layers_itr = [] for i in range(x): layers.append(64) layers_itr.append(list(layers)) layers = [] for i in range(x): layers.append(128) layers_itr.append(list(layers)) layers = [] for i in range(x): layers.append(256) layers_itr.append(list(layers)) layers = [] for i in range(x): layers.append(512) layers_itr.append(list(layers)) eg = ExperimentGrid(name='td3-bench') eg.add('env_name', 'MountainCarContinuous-v0', '', True) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 10) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', layers_itr, 'hid') eg.add('ac_kwargs:activation', [tf.nn.relu], '') eg.run(td3, num_cpu=args.cpu)
def train(): eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add('epochs', 8000) eg.add('gamma', 0.95) eg.add('lam', 0.835) # eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', (256, 256), 'hid') eg.add('ac_kwargs:activation', torch.nn.Tanh, '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '') eg.run(ppo_pytorch)
from spinup import vpg_pytorch from spinup.utils.run_utils import ExperimentGrid import torch if __name__ == '__main__': grid = ExperimentGrid(name='vpg-torch-cart-bench') grid.add('env_name', 'CartPole-v0') grid.add('seed', [0]) grid.add('epochs', 2) grid.add('steps_per_epoch', 100) grid.add('gamma', [0, 0.5, 1]) grid.add('ac_kwargs:hidden_sizes', [(32, ), (64, 64)], 'hid') grid.add('ac_kwargs:activation', [torch.nn.Tanh], '') grid.run(vpg_pytorch, num_cpu=4)
env_config = dict( env_name='deepdrive-2d-intersection-w-gs-allow-decel-v0', is_intersection_map=True, expect_normalized_action_deltas=False, jerk_penalty_coeff=0.20 / (60 * 100), gforce_penalty_coeff=0.06, collision_penalty_coeff=4, end_on_harmful_gs=False, incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, ) net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add( 'resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield2/intersection_2_agents_fine_tune_add_left_yield2_s0_2020_03-23_22-40.11' ) eg.add('reinitialize_optimizer_on_resume', True) eg.add('num_inputs_to_add', 0) eg.add('pi_lr', 3e-6) eg.add('vf_lr', 1e-5) # eg.add('boost_explore', 5) eg.add('epochs', 8000) eg.add('steps_per_epoch', 32000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '')
eg.add('steps_per_epoch', 5000) # Use default hidden sizes in actor_critic function, comment below out eg.add('ac_kwargs:hidden_sizes', [(16,16)], 'hid') eg.add('ac_kwargs:activation', [tf.nn.relu], '') eg.run(algo[i], num_cpu=args.cpu) ''' #Training if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--cpu', type=int, default=1) parser.add_argument('--num_runs', type=int, default=1) args = parser.parse_args() eg = ExperimentGrid(name=ex_number+'_ddpg_100ep') eg.add('env_name', env_name, '', True) eg.add('seed', [10*i for i in range(args.num_runs)]) eg.add('epochs', 100) #eg.add('steps_per_epoch', 4000) eg.add('max_ep_len', 1500) eg.add('ac_kwargs:activation', [tf.nn.relu], '') eg.add('ac_kwargs:hidden_sizes', [(64,64)], 'hid') eg.run(ddpg, num_cpu=args.cpu)
env_config = dict( env_name='deepdrive-2d-intersection-w-gs-allow-decel-v0', is_intersection_map=True, expect_normalized_action_deltas=False, jerk_penalty_coeff=0, gforce_penalty_coeff=0, collision_penalty_coeff=1, end_on_harmful_gs=False, incent_win=True, constrain_controls=False, ) net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add( 'resume', '/home/c2/src/tmp/spinningup/data/deepdrive-2d-intersection-no-constrained-controls-example/deepdrive-2d-intersection-no-constrained-controls-example_s0_2020_03-12_18-09.49' ) eg.add('reinitialize_optimizer_on_resume', True) eg.add( 'pi_lr', 3e-5 ) # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates eg.add( 'vf_lr', 1e-4 ) # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates eg.add('epochs', 8000) # eg.add('steps_per_epoch', 4000)
lane_margin=0.2, # https://iopscience.iop.org/article/10.1088/0143-0807/37/6/065008/pdf # Importantly they depict the threshold # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3. jerk_threshold=150.0, # 15g/s incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, physics_steps_per_observation=12, discrete_actions=COMFORTABLE_ACTIONS2, ) net_config = dict(hidden_units=(256, 256), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add( 'resume', '/home/c2/src/tmp/spinningup/data/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane19/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane19_s0_2020_05-15_18-26.32.741210' ) # eg.add('reinitialize_optimizer_on_resume', True) # eg.add('num_inputs_to_add', 0) eg.add('pi_lr', 3e-4) # default pi_lr=3e-4 eg.add('vf_lr', 1e-3) # default vf_lr=1e-3, # eg.add('boost_explore', 5) eg.add('epochs', 20000) eg.add('steps_per_epoch', 8000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '')
def run_experiment(args): def env_fn(): import HumanoidRL return gym.make(args.env_name) eg = ExperimentGrid(name=args.exp_name) eg.add('env_fn', env_fn) eg.add('seed', [10 * i for i in range(args.num_runs)]) eg.add('epochs', 500) eg.add('steps_per_epoch', 10000) eg.add('save_freq', 20) eg.add('max_ep_len', 200) eg.add('ac_kwargs:activation', tf.tanh, '') eg.run(ppo_tf1)
# Importantly they depict the threshold # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3. jerk_threshold=150.0, # 15g/s incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, physics_steps_per_observation=12, discrete_actions=COMFORTABLE_ACTIONS2, ) net_config = dict( hidden_units=(256, 256), activation=torch.nn.Tanh ) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane17_2/intersection_discrete_micro_turn_lower_lane_pen2_diag_lane17_2_s0_2020_05-13_14-46.12.823730_snapshot2') # eg.add('reinitialize_optimizer_on_resume', True) # eg.add('num_inputs_to_add', 0) # eg.add('pi_lr', 3e-6) # eg.add('vf_lr', 1e-5) # eg.add('boost_explore', 5) eg.add('epochs', 20000) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '')
# Importantly they depict the threshold # for admissible acceleration onset or jerk as j = 15g/s or ~150m/s^3. jerk_threshold=150.0, # 15g/s incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, physics_steps_per_observation=12, discrete_actions=COMFORTABLE_ACTIONS2, ) net_config = dict( hidden_units=(256, 256), activation=torch.nn.Tanh ) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) # eg.add('resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_fine_tune_add_left_yield2/intersection_2_agents_fine_tune_add_left_yield2_s0_2020_03-23_22-40.11') # eg.add('reinitialize_optimizer_on_resume', True) # eg.add('num_inputs_to_add', 0) # eg.add('pi_lr', 3e-6) # eg.add('vf_lr', 1e-5) # eg.add('boost_explore', 5) eg.add('epochs', 20000) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '')
def train(): eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) # eg.add('seed', 0) eg.add( 'resume', '/home/c2/src/tmp/spinningup/data/intersection_2_agents_lower_gamma_snapshot/intersection_2_agents_lower_gamma_s0_2020_03-12_12-07.37' ) eg.add('reinitialize_optimizer_on_resume', False) eg.add( 'pi_lr', 3e-5 ) # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates eg.add( 'vf_lr', 1e-4 ) # doesn't seem to have an effect, but playing it safe and lowering learning rate since we're not restoring adam rates eg.add('epochs', 8000) eg.add('gamma', 0.95) eg.add('lam', 0.835) # eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', (256, 256), 'hid') eg.add('ac_kwargs:activation', torch.nn.Tanh, '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '') eg.run(ppo_pytorch)
jerk_penalty_coeff=3.3e-5, gforce_penalty_coeff=0.006 * 5, collision_penalty_coeff=4, lane_penalty_coeff=0.02, speed_reward_coeff=0.50, gforce_threshold=None, incent_win=True, constrain_controls=False, incent_yield_to_oncoming_traffic=True, physics_steps_per_observation=12, discrete_actions=COMFORTABLE_ACTIONS, ) net_config = dict(hidden_units=(64, 64), activation=torch.nn.Tanh) eg = ExperimentGrid(name=experiment_name) eg.add('env_name', env_config['env_name'], '', False) pso = env_config['physics_steps_per_observation'] effective_horizon_seconds = 10 eg.add( 'gamma', 1 - pso / (effective_horizon_seconds * FPS) ) # Lower gamma so seconds of effective horizon remains at 10s with current physics steps = 12 * 1/60s * 1 / (1-gamma) eg.add('epochs', 10000) eg.add('steps_per_epoch', 8000) eg.add('ac_kwargs:hidden_sizes', net_config['hidden_units'], 'hid') eg.add('ac_kwargs:activation', net_config['activation'], '') eg.add('notes', notes, '') eg.add('run_filename', os.path.realpath(__file__), '') eg.add('env_config', env_config, '')
remainder = setting_number for setting in settings: division = int(total / len(setting)) index = int(remainder / division) remainder = remainder % division indexes.append(index) total = division actual_setting = {} for j in range(len(indexes)): actual_setting[setting_names[j]] = settings[j][indexes[j]] return indexes, actual_setting indexes, actual_setting = get_setting(args.setting, total, settings, setting_names) eg = ExperimentGrid(name=EXPERIMENT_NAME) # use eg.add to add parameters in the settings or add parameters tha apply to all jobs # we now automated this part, as long as you added settings correctly into the arrays at the start of this program # they should be added to experiment automatically for i in range(len(actual_setting)): setting_name = setting_names[i] if setting_name != 'env_name' and setting_name != 'seed': eg.add(setting_name, actual_setting[setting_name], setting_savename_prefix[i], whether_add_to_savename[i]) eg.add('env_name', actual_setting['env_name'], '', True) eg.add('seed', actual_setting['seed']) eg.run(function_to_run, num_cpu=args.cpu, data_dir=save_data_dir) print(