def do_network_based_leo(base_args, cores, name, nn_params, runs, tasks,
                         starting_task):
    args = base_args.copy()
    args['rb_min_size'] = 1000
    args['default_damage'] = 4035.00
    args['perf_td_error'] = True
    args['perf_l2_reg'] = True
    args['steps'] = 300000
    args["cl_batch_norm"] = False
    args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3'
    args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic'
    args['cl_depth'] = 2
    args['cl_pt_shape'] = (2, 3)
    args["cl_pt_load"] = nn_params[1]
    cl_load = nn_params[0]

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    mp_cfgs_new = []
    for cfg in mp_cfgs:
        config, tasks, starting_task = cfg
        copy_config = config.copy()
        copy_config["cl_load"] = cl_load
        mp_cfgs_new.append((copy_config, tasks, starting_task))
    return mp_cfgs_new
Ejemplo n.º 2
0
def do_steps_based(base_args,
                   cores,
                   name,
                   steps,
                   runs,
                   options=None,
                   tasks={},
                   starting_task=''):
    args = base_args.copy()
    args['steps'] = steps

    if options:
        suffix = ''
        if options['balancing_tf']:
            suffix += '1_' + options['balancing_tf']
        if options['balancing']:
            suffix += '2_' + options['balancing']
        if options['walking']:
            suffix += '3_' + options['walking']
        if suffix:
            name += '-' + suffix
        args['options'] = options

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # generate configurations
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    return mp_cfgs
Ejemplo n.º 3
0
def do_reach_timeout_based(base_args,
                           cores,
                           name,
                           reach_timeout,
                           runs,
                           options=None,
                           tasks={},
                           starting_task=''):
    args = base_args.copy()
    args['reach_timeout'] = reach_timeout
    steps = 300000
    args['steps'] = steps
    args['rb_max_size'] = steps

    if options:
        suffix = ''
        if options['balancing_tf']:
            suffix += '1_' + options['balancing_tf'] + '_'
        if options['balancing']:
            suffix += '2_' + options['balancing'] + '_'
        if options['walking']:
            suffix += '3_' + options['walking']
        if suffix:
            name += '-' + suffix
        args['options'] = options

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    return mp_cfgs
Ejemplo n.º 4
0
def do_steps_based(base_args, cores, name, steps, runs, tasks, starting_task):
    args = base_args.copy()
    args['steps'] = steps

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    return mp_cfgs
def do_network_based_mujoco(base_args, cores, name, nn_params, options, runs,
                            tasks, starting_task):
    args = base_args.copy()
    args['env_td_error_scale'] = 600.0
    args['env_timeout'] = 16.5
    args['steps'] = 700000
    args["rb_max_size"] = args['steps']
    args['rb_min_size'] = 1000
    args['default_damage'] = 4035.00
    args['perf_td_error'] = True
    args['perf_l2_reg'] = True
    args["cl_batch_norm"] = False
    args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3'
    args['cl_stages'] = 'balancing_tf;balancing;walking:monotonic'
    args['cl_pt_shape'] = (args['cl_depth'], 3)
    args["cl_pt_load"] = nn_params[1]
    cl_load = nn_params[0]

    if options:
        suffix = ''
        if options['balancing_tf']:
            suffix += '1_' + options['balancing_tf'] + '_'
        if options['balancing']:
            suffix += '2_' + options['balancing'] + '_'
        if options['walking']:
            suffix += '3_' + options['walking']
        if suffix:
            name += '-' + suffix
        args['options'] = options

    hp = Helper(args, 'cl', name, tasks, starting_task, cores, use_mp=True)

    # Weights of the NN
    solutions = [None] * len(runs)
    begin = runs[0]

    mp_cfgs = hp.gen_cfg(solutions, 1, begin=begin)
    mp_cfgs_new = []
    for cfg in mp_cfgs:
        config, tasks, starting_task = cfg
        copy_config = config.copy()
        copy_config["cl_load"] = cl_load
        mp_cfgs_new.append((copy_config, tasks, starting_task))
    return mp_cfgs_new
args['perf_l2_reg'] = True
args['steps'] = 300000
args["rb_max_size"] = args['steps']
#args["cl_batch_norm"] = True
#args['cl_structure'] = 'ffcritic:fc_relu_4;fc_relu_3;fc_relu_3'
args["cl_batch_norm"] = False
args['cl_structure'] = 'rnnc:gru_tanh_6_dropout;fc_linear_3'
args["cl_stages"] = "balancing_tf;balancing;walking:monotonic"
args['cl_depth'] = 2
args['cl_pt_shape'] = (args['cl_depth'], 3)
args['test_interval'] = 30

#args["cl_target"] = True
export_names = "eq_curriculum_network_depth_" + str(args['cl_depth'])
nn_params = (export_names, "{}_stat.pkl".format(export_names))
args["cl_pt_load"] = nn_params[1]

# Parameters
tasks = {
    'balancing_tf': 'cfg/leo_balancing_tf.yaml',
    'balancing': 'cfg/leo_balancing.yaml',
    'walking': 'cfg/leo_walking.yaml'
}
starting_task = 'balancing_tf'
hp = Helper(args, 'cl', 'ddpg', tasks, starting_task, 1, use_mp=False)

# Run actual script.
config, tasks, starting_task = hp.gen_cfg([None], 1)[0]
config["cl_load"] = nn_params[0]
cl_run(tasks, starting_task, **config)
Ejemplo n.º 7
0
def main():
    prepare_multiprocessing()
    alg = 'ddpg'
    args = parse_args()

    if args['cores']:
        arg_cores = min(multiprocessing.cpu_count(), args['cores'])
    else:
        arg_cores = min(multiprocessing.cpu_count(), 32)
    print('Using {} cores.'.format(arg_cores))

    args['mp_debug'] = True
    #args['reach_return'] = 1422.66
    #args['default_damage'] = 4035.00
    args['reach_return'] = 526.0
    args['default_damage'] = 4132.00
    args['perf_td_error'] = True
    args['perf_l2_reg'] = True
    args['rb_min_size'] = 1000
    args['cl_l2_reg'] = 0
    steps = 400000
    args['rb_max_size'] = steps
    steps_delta_a = 1000
    steps_delta_b = 4000
    popsize = 16 * 6
    G = 100
    use_mp = True

    #    ### For debugging
    #    args['mp_debug'] = False
    #    steps       = 3000
    #    steps_delta_a = 50
    #    steps_delta_b = 50
    #    G = 100
    #    popsize = 3
    #    use_mp = False
    #    ###

    # Tasks
    tasks = {
        'balancing_tf': 'cfg/leo_balancing_tf.yaml',
        'balancing': 'cfg/leo_balancing.yaml',
        'walking': 'cfg/leo_walking.yaml'
    }
    starting_task = 'balancing_tf'

    options = {
        'balancing_tf': '',
        'balancing': 'nnload_rbload',
        'walking': 'nnload_rbload'
    }

    root = "cl"
    if not os.path.exists(root):
        os.makedirs(root)

    categories = range(26)
    #balancing_tf = np.array(categories)/max(categories)
    #balancing_tf = [int(steps_ub*(math.exp(3*x)-1)/(math.exp(3)-1)) for x in balancing_tf]

    # To ensure fair sampling, enumberate all step_options and select unique ones!
    step_combinations, step_solutions = [], []
    for a in categories:
        for b in categories:
            sol = comb_to_sol((a, b), steps, steps_delta_a, steps_delta_b)
            if sol not in step_solutions:
                step_combinations.append((a, b))
                step_solutions.append(sol)

    opt = opt_ce(popsize, step_combinations, categories)
    g = 1
    #opt = opt_ce.load(root, 'opt.pkl')
    #g = 2

    hp = Helper(args,
                root,
                alg,
                tasks,
                starting_task,
                arg_cores,
                use_mp=use_mp)

    while not opt.stop() and g <= G:
        if args['mp_debug']:
            sys.stdout = Logger(root + "/stdout-g{:04}.log".format(g))
            print("Should work")

        combinations = opt.ask()

        # convert sampled options to solutions
        solutions = []
        for comb in combinations:
            solutions.append(
                comb_to_sol(comb, steps, steps_delta_a, steps_delta_b))

        # preparation
        mp_cfgs = hp.gen_cfg_steps(solutions, g, options=options)
        for cfg in mp_cfgs:
            cfg[0]['test_interval'] = 1 + randint(0, 29)

        # evaluate and backup immediately
        damage = hp.run(mp_cfgs)
        with open(root + '/damage.pkl', 'wb') as f:
            pickle.dump(damage, f, 2)

        # remove None elements
        notnonel = np.where(np.array(damage) != None)[0]
        damage = [d for i, d in enumerate(damage) if i in notnonel]
        combinations = [d for i, d in enumerate(combinations) if i in notnonel]

        # update using *original* solutions == combinations
        best = opt.tell(combinations, damage)

        # back-project to array incluing None elements
        best = [notnonel[i] for i in best]

        # logging
        opt.log(root, alg, g, hp.damage_info, hp.reeval_damage_info, best)
        opt.save(root, 'opt.pkl')

        # new iteration
        g += 1