def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(16) exp_name = "ddpg-exp2_two_stage" tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs = [] # In this experiment we keep walking steps at 250000, but additionally do balancing steps of various durations wsteps = 250000 for bsteps_mul in range(9): name = "{}-mul{}".format(exp_name, bsteps_mul) if bsteps_mul > 0: bsteps = 25000 * bsteps_mul args['rb_max_size'] = wsteps options = { 'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload' } mp_cfgs += do_steps_based(args, cores, name=name, steps=(-1, bsteps, wsteps), options=options, **misc) else: options = {'balancing_tf': '', 'balancing': '', 'walking': ''} mp_cfgs += do_steps_based(args, cores, name=name, steps=(-1, -1, wsteps), options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(16) keep_samples = False exp_name = "ddpg-exp1_two_stage" tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs} mp_cfgs = [] steps = 300000 args['rb_max_size'] = steps options = {'balancing_tf': '', 'balancing': '', 'walking': ''} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, -1, steps), options=options, **misc) bsteps = 50000 wsteps = steps - bsteps args['rb_max_size'] = steps if keep_samples else wsteps options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload_re_walking_300_-1.5'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'rbload'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) options = {'balancing_tf': '', 'balancing': '', 'walking': 'rbload_re_walking_300_-1.5'} mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(7, 16) exp_name = "ddpg-exp1_two_stage" starting_task = 'balancing_tf' misc = {'starting_task': starting_task, 'runs': runs} mp_cfgs = [] keep_samples = False # Hopper tasks = { 'balancing_tf': 'RoboschoolHopperBalancingGRL-v1', 'balancing': 'RoboschoolHopperBalancingGRL-v1', 'walking': 'RoboschoolHopperGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps mp_cfgs += create_tasks(args, cores, exp_name + '_hopper', bsteps, steps, reassess_for, tasks, **misc) # HalfCheetah tasks = { 'balancing_tf': 'RoboschoolHalfCheetahBalancingGRL-v1', 'balancing': 'RoboschoolHalfCheetahBalancingGRL-v1', 'walking': 'RoboschoolHalfCheetahGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps mp_cfgs += create_tasks(args, cores, exp_name + '_halfcheetah', bsteps, steps, reassess_for, tasks, **misc) # Walker2d tasks = { 'balancing_tf': 'RoboschoolWalker2dBalancingGRL-v1', 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', 'walking': 'RoboschoolWalker2dGRL-v1' } bsteps = 200000 steps = 700000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps mp_cfgs += create_tasks(args, cores, exp_name + '_walker2d', bsteps, steps, reassess_for, tasks, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # for working with yaml files _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG yaml.add_representer(collections.OrderedDict, dict_representer) yaml.add_constructor(_mapping_tag, dict_constructor) # Parameters runs = range(16) # create perturbed models of leo model_paths = ( '/home/ivan/work/Project/Software/grl/src/grl/addons/rbdl/cfg/leo_vc', '/grl/src/grl/addons/rbdl/cfg/leo_vc', ) models, names = create_models(model_paths) tasks, names = create_tasks(models, names) args['cl_depth'] = 2 options = { 'balancing_tf': '', 'balancing': 'nnload_rbload', 'walking': 'nnload_rbload' } starting_task = 'balancing_tf' mp_cfgs = [] for task, name in zip(tasks, names): misc = {'tasks': task, 'starting_task': starting_task, 'runs': runs} export_names = "eq_curriculum_network_depth_" + str(args['cl_depth']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) mp_cfgs += do_network_based_leo(args, cores, name='ddpg-cl_short_' + name, nn_params=nn_params, options=options, **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bbw', steps=(20000, 30000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bw', steps=( -1, 50000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-w', steps=( -1, -1, 300000), **misc) # # # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb55', reach_timeout=(5.0, 5.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb5', reach_timeout=(-1.0, 5.0, 0.0), **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb2020', reach_timeout=(20.0, 20.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb20', reach_timeout=(-1.0, 20.0, 0.0), **misc) # # walker2d # tasks = { # 'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1', # 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', # 'walking': 'RoboschoolWalker2dGRL-v1' # } # misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs} # mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-cl_short_walker2d', nn_params=nn_params, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() # do_multiprocessing_pool(cores, mp_cfgs) config, tasks, starting_task = mp_cfgs[0] cl_run(tasks, starting_task, **config)
def main(): args = parse_args() args[ "test_interval"] = -1 # this ensures that only learning trajectory is exported if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(6) exp_name = "ddpg-exp1_two_stage" starting_task = 'balancing_tf' misc = {'starting_task': starting_task, 'runs': runs} mp_cfgs = [] keep_samples = False # Leo tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } bsteps = 50000 steps = 300000 reassess_for = 'walking_300_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.03 cl_options = { 'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload' } mp_cfgs += create_tasks(args, cores, exp_name + '_leo', bsteps, steps, reassess_for, tasks, cl_options, **misc) # Hopper tasks = { 'balancing_tf': 'RoboschoolHopperBalancingGRL-v1', 'balancing': 'RoboschoolHopperBalancingGRL-v1', 'walking': 'RoboschoolHopperGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.0165 cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += create_tasks(args, cores, exp_name + '_hopper', bsteps, steps, reassess_for, tasks, cl_options, **misc) # HalfCheetah tasks = { 'balancing_tf': 'RoboschoolHalfCheetahBalancingGRL-v1', 'balancing': 'RoboschoolHalfCheetahBalancingGRL-v1', 'walking': 'RoboschoolHalfCheetahGRL-v1' } bsteps = 100000 steps = 600000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.0165 cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += create_tasks(args, cores, exp_name + '_halfcheetah', bsteps, steps, reassess_for, tasks, cl_options, **misc) # Walker2d tasks = { 'balancing_tf': 'RoboschoolWalker2dBalancingGRL-v1', 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', 'walking': 'RoboschoolWalker2dGRL-v1' } bsteps = 200000 steps = 700000 reassess_for = 'walking_3_-1.5' args['rb_max_size'] = steps if keep_samples else steps - bsteps args['env_timestep'] = 0.0165 cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'} mp_cfgs += create_tasks(args, cores, exp_name + '_walker2d', bsteps, steps, reassess_for, tasks, cl_options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # for working with yaml files _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG yaml.add_representer(collections.OrderedDict, dict_representer) yaml.add_constructor(_mapping_tag, dict_constructor) # Parameters runs = range(16) args['cl_depth'] = 2 options = { 'balancing_tf': '', 'balancing': 'nnload_rbload', 'walking': 'nnload_rbload' } export_names = "eq_curriculum_network_depth_" + str(args['cl_depth']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) starting_task = 'balancing_tf' mp_cfgs = [] # leo tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs += do_network_based_leo(args, cores, name='ddpg-leo', nn_params=nn_params, options=options, **misc) # leo_perturbed tasks = { 'balancing_tf': 'cfg/leo_perturbed_balancing_tf.yaml', 'balancing': 'cfg/leo_perturbed_balancing.yaml', 'walking': 'cfg/leo_perturbed_walking.yaml' } misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs += do_network_based_leo(args, cores, name='ddpg-leo_perturbed', nn_params=nn_params, options=options, **misc) # walker2d tasks = { 'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1', 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', 'walking': 'RoboschoolWalker2dGRL-v1' } misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-walker2d', nn_params=nn_params, options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(1) #model = 'perturbed' model = 'idealized' if model == 'idealized': tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } else: tasks = { 'balancing_tf': 'cfg/leo_perturbed_balancing_tf.yaml', 'balancing': 'cfg/leo_perturbed_balancing.yaml', 'walking': 'cfg/leo_perturbed_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} mp_cfgs = [] # nn_params=("long_curriculum_network", "long_curriculum_network_stat.pkl") # mp_cfgs += do_network_based(args, cores, name='ddpg-cl_long', nn_params=nn_params, **misc) nn_params = ("short_curriculum_network", "short_curriculum_network_stat.pkl") mp_cfgs += do_network_based_leo(args, cores, name='ddpg-cl_short', nn_params=nn_params, **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bbw', steps=(20000, 30000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-bw', steps=( -1, 50000, 250000), **misc) # mp_cfgs += do_steps_based(args, cores, name='ddpg-w', steps=( -1, -1, 300000), **misc) # # # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb55', reach_timeout=(5.0, 5.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb5', reach_timeout=(-1.0, 5.0, 0.0), **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb2020', reach_timeout=(20.0, 20.0, 0.0), **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb20', reach_timeout=(-1.0, 20.0, 0.0), **misc) # # walker2d # tasks = { # 'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1', # 'balancing': 'RoboschoolWalker2dBalancingGRL-v1', # 'walking': 'RoboschoolWalker2dGRL-v1' # } # misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs} # mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-cl_short_walker2d', nn_params=nn_params, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() #do_multiprocessing_pool(cores, mp_cfgs) config, tasks, starting_task = mp_cfgs[0] cl_run(tasks, starting_task, **config)
def main(): args = parse_args() if args['cores']: cores = min(cpu_count(), args['cores']) else: cores = min(cpu_count(), 16) print('Using {} cores.'.format(cores)) # Parameters runs = range(16) tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs} args['cl_keep_samples'] = True options = { 'balancing_tf': '', 'balancing': 'nnload', 'walking': 'nnload_rbload' } mp_cfgs = [] # # naive switching after achieving the balancing for n number of seconds happening once. 0 means not used # args['reach_timeout_num'] = 1 # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb5', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb5', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb20', reach_timeout=(20.0, 20.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb20', reach_timeout=(-1.0, 20.0, 0.0), options=options, **misc) # # # # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used # args['reach_timeout_num'] = 2 # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb55', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb55', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc) # # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb2020', reach_timeout=(20.0, 20.0, 0.0), options=options, **misc) # mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb2020', reach_timeout=(-1.0, 20.0, 0.0), options=options, **misc) args['reach_timeout_num'] = 5 mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb55555', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc) mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb55555', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc) # DBG: export configuration export_cfg(mp_cfgs) # Run all scripts at once random.shuffle(mp_cfgs) prepare_multiprocessing() do_multiprocessing_pool(cores, mp_cfgs)