Beispiel #1
0
def do_steps_based(base_args,
                   cores,
                   name,
                   steps,
                   runs,
                   options=None,
                   tasks={},
                   starting_task=''):
    args = base_args.copy()
    args['steps'] = steps

    if options:
        suffix = ''
        if options['balancing_tf']:
            suffix += '1_' + options['balancing_tf']
        if options['balancing']:
            suffix += '2_' + options['balancing']
        if options['walking']:
            suffix += '3_' + options['walking']
        if suffix:
            name += '-' + suffix
        args['options'] = options

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # generate configurations
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    return mp_cfgs
def do_network_based_leo(base_args, cores, name, nn_params, runs, tasks,
                         starting_task):
    args = base_args.copy()
    args['rb_min_size'] = 1000
    args['default_damage'] = 4035.00
    args['perf_td_error'] = True
    args['perf_l2_reg'] = True
    args['steps'] = 300000
    args["cl_batch_norm"] = False
    args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3'
    args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic'
    args['cl_depth'] = 2
    args['cl_pt_shape'] = (2, 3)
    args["cl_pt_load"] = nn_params[1]
    cl_load = nn_params[0]

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    mp_cfgs_new = []
    for cfg in mp_cfgs:
        config, tasks, starting_task = cfg
        copy_config = config.copy()
        copy_config["cl_load"] = cl_load
        mp_cfgs_new.append((copy_config, tasks, starting_task))
    return mp_cfgs_new
Beispiel #3
0
def do_reach_timeout_based(base_args,
                           cores,
                           name,
                           reach_timeout,
                           runs,
                           options=None,
                           tasks={},
                           starting_task=''):
    args = base_args.copy()
    args['reach_timeout'] = reach_timeout
    steps = 300000
    args['steps'] = steps
    args['rb_max_size'] = steps

    if options:
        suffix = ''
        if options['balancing_tf']:
            suffix += '1_' + options['balancing_tf'] + '_'
        if options['balancing']:
            suffix += '2_' + options['balancing'] + '_'
        if options['walking']:
            suffix += '3_' + options['walking']
        if suffix:
            name += '-' + suffix
        args['options'] = options

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    return mp_cfgs
Beispiel #4
0
def do_steps_based(base_args, cores, name, steps, runs, tasks, starting_task):
    args = base_args.copy()
    args['steps'] = steps

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    return mp_cfgs
def do_network_based_mujoco(base_args, cores, name, nn_params, options, runs,
                            tasks, starting_task):
    args = base_args.copy()
    args['env_td_error_scale'] = 600.0
    args['env_timeout'] = 16.5
    args['steps'] = 700000
    args["rb_max_size"] = args['steps']
    args['rb_min_size'] = 1000
    args['default_damage'] = 4035.00
    args['perf_td_error'] = True
    args['perf_l2_reg'] = True
    args["cl_batch_norm"] = False
    args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3'
    args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic'
    args['cl_pt_shape'] = (args['cl_depth'], 3)
    args["cl_pt_load"] = nn_params[1]
    cl_load = nn_params[0]

    if options:
        suffix = ''
        if options['balancing_tf']:
            suffix += '1_' + options['balancing_tf'] + '_'
        if options['balancing']:
            suffix += '2_' + options['balancing'] + '_'
        if options['walking']:
            suffix += '3_' + options['walking']
        if suffix:
            name += '-' + suffix
        args['options'] = options

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    mp_cfgs_new = []
    for cfg in mp_cfgs:
        config, tasks, starting_task = cfg
        copy_config = config.copy()
        copy_config["cl_load"] = cl_load
        mp_cfgs_new.append((copy_config, tasks, starting_task))
    return mp_cfgs_new
args['perf_l2_reg'] = True
args['steps'] = 300000
args["rb_max_size"] = args['steps']
#args["cl_batch_norm"] = True
#args['cl_structure'] = 'ffcritic:fc_relu_4;fc_relu_3;fc_relu_3'
args["cl_batch_norm"] = False
args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3'
args["cl_stages"] = "balancing_tf;balancing;walking:monotonic"
args['cl_depth'] = 2
args['cl_pt_shape'] = (args['cl_depth'], 3)
args['test_interval'] = 30

#args["cl_target"] = True
export_names = "eq_curriculum_network_depth_" + str(args['cl_depth'])
nn_params = (export_names, "{}_stat.pkl".format(export_names))
args["cl_pt_load"] = nn_params[1]

# Parameters
tasks = {
    'balancing_tf': 'cfg/leo_balancing_tf.yaml',
    'balancing': 'cfg/leo_balancing.yaml',
    'walking': 'cfg/leo_walking.yaml'
}
starting_task = 'balancing_tf'
hp = Helper(args, 'cl', 'ddpg', tasks, starting_task, 1, use_mp=False)

# Run actual script.
config, tasks, starting_task = hp.gen_cfg([None], 1)[0]
config["cl_load"] = nn_params[0]
cl_run(tasks, starting_task, **config)