def do_steps_based(base_args, cores, name, steps, runs, options=None, tasks={}, starting_task=''): args = base_args.copy() args['steps'] = steps if options: suffix = '' if options['balancing_tf']: suffix += '1_' + options['balancing_tf'] if options['balancing']: suffix += '2_' + options['balancing'] if options['walking']: suffix += '3_' + options['walking'] if suffix: name += '-' + suffix args['options'] = options hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # generate configurations solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) return mp_cfgs
def do_network_based_leo(base_args, cores, name, nn_params, runs, tasks, starting_task): args = base_args.copy() args['rb_min_size'] = 1000 args['default_damage'] = 4035.00 args['perf_td_error'] = True args['perf_l2_reg'] = True args['steps'] = 300000 args["cl_batch_norm"] = False args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3' args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic' args['cl_depth'] = 2 args['cl_pt_shape'] = (2, 3) args["cl_pt_load"] = nn_params[1] cl_load = nn_params[0] hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) mp_cfgs_new = [] for cfg in mp_cfgs: config, tasks, starting_task = cfg copy_config = config.copy() copy_config["cl_load"] = cl_load mp_cfgs_new.append((copy_config, tasks, starting_task)) return mp_cfgs_new
def do_reach_timeout_based(base_args, cores, name, reach_timeout, runs, options=None, tasks={}, starting_task=''): args = base_args.copy() args['reach_timeout'] = reach_timeout steps = 300000 args['steps'] = steps args['rb_max_size'] = steps if options: suffix = '' if options['balancing_tf']: suffix += '1_' + options['balancing_tf'] + '_' if options['balancing']: suffix += '2_' + options['balancing'] + '_' if options['walking']: suffix += '3_' + options['walking'] if suffix: name += '-' + suffix args['options'] = options hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) return mp_cfgs
def do_steps_based(base_args, cores, name, steps, runs, tasks, starting_task): args = base_args.copy() args['steps'] = steps hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) return mp_cfgs
def do_network_based_mujoco(base_args, cores, name, nn_params, options, runs, tasks, starting_task): args = base_args.copy() args['env_td_error_scale'] = 600.0 args['env_timeout'] = 16.5 args['steps'] = 700000 args["rb_max_size"] = args['steps'] args['rb_min_size'] = 1000 args['default_damage'] = 4035.00 args['perf_td_error'] = True args['perf_l2_reg'] = True args["cl_batch_norm"] = False args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3' args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic' args['cl_pt_shape'] = (args['cl_depth'], 3) args["cl_pt_load"] = nn_params[1] cl_load = nn_params[0] if options: suffix = '' if options['balancing_tf']: suffix += '1_' + options['balancing_tf'] + '_' if options['balancing']: suffix += '2_' + options['balancing'] + '_' if options['walking']: suffix += '3_' + options['walking'] if suffix: name += '-' + suffix args['options'] = options hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True) # Weights of the NN solutions = [None] * len(runs) begin = runs[0] mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin) mp_cfgs_new = [] for cfg in mp_cfgs: config, tasks, starting_task = cfg copy_config = config.copy() copy_config["cl_load"] = cl_load mp_cfgs_new.append((copy_config, tasks, starting_task)) return mp_cfgs_new
args['perf_l2_reg'] = True args['steps'] = 300000 args["rb_max_size"] = args['steps'] #args["cl_batch_norm"] = True #args['cl_structure'] = 'ffcritic:fc_relu_4;fc_relu_3;fc_relu_3' args["cl_batch_norm"] = False args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3' args["cl_stages"] = "balancing_tf;balancing;walking:monotonic" args['cl_depth'] = 2 args['cl_pt_shape'] = (args['cl_depth'], 3) args['test_interval'] = 30 #args["cl_target"] = True export_names = "eq_curriculum_network_depth_" + str(args['cl_depth']) nn_params = (export_names, "{}_stat.pkl".format(export_names)) args["cl_pt_load"] = nn_params[1] # Parameters tasks = { 'balancing_tf': 'cfg/leo_balancing_tf.yaml', 'balancing': 'cfg/leo_balancing.yaml', 'walking': 'cfg/leo_walking.yaml' } starting_task = 'balancing_tf' hp = Helper(args, 'cl', 'ddpg', tasks, starting_task, 1, use_mp=False) # Run actual script. config, tasks, starting_task = hp.gen_cfg([None], 1)[0] config["cl_load"] = nn_params[0] cl_run(tasks, starting_task, **config)