Esempio n. 1
0
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # Parameters
    runs = range(16)
    exp_name = "ddpg-exp2_two_stage"

    tasks = {
        'balancing_tf': 'cfg/leo_balancing_tf.yaml',
        'balancing': 'cfg/leo_balancing.yaml',
        'walking': 'cfg/leo_walking.yaml'
    }

    starting_task = 'balancing_tf'
    misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs}

    mp_cfgs = []

    # In this experiment we keep walking steps at 250000, but additionally do balancing steps of various durations
    wsteps = 250000
    for bsteps_mul in range(9):
        name = "{}-mul{}".format(exp_name, bsteps_mul)
        if bsteps_mul > 0:
            bsteps = 25000 * bsteps_mul
            args['rb_max_size'] = wsteps
            options = {
                'balancing_tf': '',
                'balancing': '',
                'walking': 'nnload_rbload'
            }
            mp_cfgs += do_steps_based(args,
                                      cores,
                                      name=name,
                                      steps=(-1, bsteps, wsteps),
                                      options=options,
                                      **misc)
        else:
            options = {'balancing_tf': '', 'balancing': '', 'walking': ''}
            mp_cfgs += do_steps_based(args,
                                      cores,
                                      name=name,
                                      steps=(-1, -1, wsteps),
                                      options=options,
                                      **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    do_multiprocessing_pool(cores, mp_cfgs)
Esempio n. 2
0
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # Parameters
    runs = range(16)
    keep_samples = False
    exp_name = "ddpg-exp1_two_stage"

    tasks = {
            'balancing_tf': 'cfg/leo_balancing_tf.yaml',
            'balancing':    'cfg/leo_balancing.yaml',
            'walking':      'cfg/leo_walking.yaml'
            }


    starting_task = 'balancing_tf'
    misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs}

    mp_cfgs = []

    steps = 300000
    args['rb_max_size'] = steps
    options = {'balancing_tf': '', 'balancing': '', 'walking': ''}
    mp_cfgs += do_steps_based(args, cores, name=exp_name,   steps=(-1,  -1, steps), options=options, **misc)

    bsteps = 50000
    wsteps = steps - bsteps
    args['rb_max_size'] = steps if keep_samples else wsteps
    options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'}
    mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc)

    options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload'}
    mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc)

    options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload_rbload_re_walking_300_-1.5'}
    mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc)

    options = {'balancing_tf': '', 'balancing': '', 'walking': 'rbload'}
    mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc)

    options = {'balancing_tf': '', 'balancing': '', 'walking': 'rbload_re_walking_300_-1.5'}
    mp_cfgs += do_steps_based(args, cores, name=exp_name, steps=(-1, bsteps, wsteps), options=options, **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    do_multiprocessing_pool(cores, mp_cfgs)
Esempio n. 3
0
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # Parameters
    runs = range(7, 16)
    exp_name = "ddpg-exp1_two_stage"

    starting_task = 'balancing_tf'
    misc = {'starting_task': starting_task, 'runs': runs}
    mp_cfgs = []

    keep_samples = False

    # Hopper
    tasks = {
        'balancing_tf': 'RoboschoolHopperBalancingGRL-v1',
        'balancing': 'RoboschoolHopperBalancingGRL-v1',
        'walking': 'RoboschoolHopperGRL-v1'
    }
    bsteps = 100000
    steps = 600000
    reassess_for = 'walking_3_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    mp_cfgs += create_tasks(args, cores, exp_name + '_hopper', bsteps, steps,
                            reassess_for, tasks, **misc)

    # HalfCheetah
    tasks = {
        'balancing_tf': 'RoboschoolHalfCheetahBalancingGRL-v1',
        'balancing': 'RoboschoolHalfCheetahBalancingGRL-v1',
        'walking': 'RoboschoolHalfCheetahGRL-v1'
    }
    bsteps = 100000
    steps = 600000
    reassess_for = 'walking_3_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    mp_cfgs += create_tasks(args, cores, exp_name + '_halfcheetah', bsteps,
                            steps, reassess_for, tasks, **misc)

    # Walker2d
    tasks = {
        'balancing_tf': 'RoboschoolWalker2dBalancingGRL-v1',
        'balancing': 'RoboschoolWalker2dBalancingGRL-v1',
        'walking': 'RoboschoolWalker2dGRL-v1'
    }

    bsteps = 200000
    steps = 700000
    reassess_for = 'walking_3_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    mp_cfgs += create_tasks(args, cores, exp_name + '_walker2d', bsteps, steps,
                            reassess_for, tasks, **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    do_multiprocessing_pool(cores, mp_cfgs)
Esempio n. 4
0
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # for working with yaml files
    _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG
    yaml.add_representer(collections.OrderedDict, dict_representer)
    yaml.add_constructor(_mapping_tag, dict_constructor)

    # Parameters
    runs = range(16)

    # create perturbed models of leo
    model_paths = (
        '/home/ivan/work/Project/Software/grl/src/grl/addons/rbdl/cfg/leo_vc',
        '/grl/src/grl/addons/rbdl/cfg/leo_vc',
    )

    models, names = create_models(model_paths)
    tasks, names = create_tasks(models, names)

    args['cl_depth'] = 2

    options = {
        'balancing_tf': '',
        'balancing': 'nnload_rbload',
        'walking': 'nnload_rbload'
    }
    starting_task = 'balancing_tf'
    mp_cfgs = []
    for task, name in zip(tasks, names):
        misc = {'tasks': task, 'starting_task': starting_task, 'runs': runs}

        export_names = "eq_curriculum_network_depth_" + str(args['cl_depth'])
        nn_params = (export_names, "{}_stat.pkl".format(export_names))
        mp_cfgs += do_network_based_leo(args,
                                        cores,
                                        name='ddpg-cl_short_' + name,
                                        nn_params=nn_params,
                                        options=options,
                                        **misc)

    #    mp_cfgs += do_steps_based(args, cores, name='ddpg-bbw', steps=(20000, 30000, 250000), **misc)
    #    mp_cfgs += do_steps_based(args, cores, name='ddpg-bw',  steps=(   -1, 50000, 250000), **misc)
    #    mp_cfgs += do_steps_based(args, cores, name='ddpg-w',   steps=(   -1,    -1, 300000), **misc)
    #
    #    # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb55', reach_timeout=(5.0, 5.0, 0.0), **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb5', reach_timeout=(-1.0, 5.0, 0.0), **misc)
    #
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb2020', reach_timeout=(20.0, 20.0, 0.0), **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb20', reach_timeout=(-1.0, 20.0, 0.0), **misc)

    #    # walker2d
    #    tasks = {
    #        'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1',
    #        'balancing':    'RoboschoolWalker2dBalancingGRL-v1',
    #        'walking':      'RoboschoolWalker2dGRL-v1'
    #        }
    #    misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs}
    #    mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-cl_short_walker2d', nn_params=nn_params, **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    #    do_multiprocessing_pool(cores, mp_cfgs)
    config, tasks, starting_task = mp_cfgs[0]
    cl_run(tasks, starting_task, **config)
Esempio n. 5
0
def main():
    args = parse_args()
    args[
        "test_interval"] = -1  # this ensures that only learning trajectory is exported

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # Parameters
    runs = range(6)
    exp_name = "ddpg-exp1_two_stage"

    starting_task = 'balancing_tf'
    misc = {'starting_task': starting_task, 'runs': runs}
    mp_cfgs = []

    keep_samples = False

    # Leo
    tasks = {
        'balancing_tf': 'cfg/leo_balancing_tf.yaml',
        'balancing': 'cfg/leo_balancing.yaml',
        'walking': 'cfg/leo_walking.yaml'
    }
    bsteps = 50000
    steps = 300000
    reassess_for = 'walking_300_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    args['env_timestep'] = 0.03
    cl_options = {
        'balancing_tf': '',
        'balancing': '',
        'walking': 'nnload_rbload'
    }
    mp_cfgs += create_tasks(args, cores, exp_name + '_leo', bsteps, steps,
                            reassess_for, tasks, cl_options, **misc)

    # Hopper
    tasks = {
        'balancing_tf': 'RoboschoolHopperBalancingGRL-v1',
        'balancing': 'RoboschoolHopperBalancingGRL-v1',
        'walking': 'RoboschoolHopperGRL-v1'
    }
    bsteps = 100000
    steps = 600000
    reassess_for = 'walking_3_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    args['env_timestep'] = 0.0165
    cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'}
    mp_cfgs += create_tasks(args, cores, exp_name + '_hopper', bsteps, steps,
                            reassess_for, tasks, cl_options, **misc)

    # HalfCheetah
    tasks = {
        'balancing_tf': 'RoboschoolHalfCheetahBalancingGRL-v1',
        'balancing': 'RoboschoolHalfCheetahBalancingGRL-v1',
        'walking': 'RoboschoolHalfCheetahGRL-v1'
    }
    bsteps = 100000
    steps = 600000
    reassess_for = 'walking_3_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    args['env_timestep'] = 0.0165
    cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'}
    mp_cfgs += create_tasks(args, cores, exp_name + '_halfcheetah', bsteps,
                            steps, reassess_for, tasks, cl_options, **misc)

    # Walker2d
    tasks = {
        'balancing_tf': 'RoboschoolWalker2dBalancingGRL-v1',
        'balancing': 'RoboschoolWalker2dBalancingGRL-v1',
        'walking': 'RoboschoolWalker2dGRL-v1'
    }
    bsteps = 200000
    steps = 700000
    reassess_for = 'walking_3_-1.5'
    args['rb_max_size'] = steps if keep_samples else steps - bsteps
    args['env_timestep'] = 0.0165
    cl_options = {'balancing_tf': '', 'balancing': '', 'walking': 'nnload'}
    mp_cfgs += create_tasks(args, cores, exp_name + '_walker2d', bsteps, steps,
                            reassess_for, tasks, cl_options, **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    do_multiprocessing_pool(cores, mp_cfgs)
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # for working with yaml files
    _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG
    yaml.add_representer(collections.OrderedDict, dict_representer)
    yaml.add_constructor(_mapping_tag, dict_constructor)

    # Parameters
    runs = range(16)
    args['cl_depth'] = 2

    options = {
        'balancing_tf': '',
        'balancing': 'nnload_rbload',
        'walking': 'nnload_rbload'
    }
    export_names = "eq_curriculum_network_depth_" + str(args['cl_depth'])
    nn_params = (export_names, "{}_stat.pkl".format(export_names))

    starting_task = 'balancing_tf'
    mp_cfgs = []

    # leo
    tasks = {
        'balancing_tf': 'cfg/leo_balancing_tf.yaml',
        'balancing': 'cfg/leo_balancing.yaml',
        'walking': 'cfg/leo_walking.yaml'
    }
    misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs}
    mp_cfgs += do_network_based_leo(args,
                                    cores,
                                    name='ddpg-leo',
                                    nn_params=nn_params,
                                    options=options,
                                    **misc)

    # leo_perturbed
    tasks = {
        'balancing_tf': 'cfg/leo_perturbed_balancing_tf.yaml',
        'balancing': 'cfg/leo_perturbed_balancing.yaml',
        'walking': 'cfg/leo_perturbed_walking.yaml'
    }
    misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs}
    mp_cfgs += do_network_based_leo(args,
                                    cores,
                                    name='ddpg-leo_perturbed',
                                    nn_params=nn_params,
                                    options=options,
                                    **misc)

    # walker2d
    tasks = {
        'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1',
        'balancing': 'RoboschoolWalker2dBalancingGRL-v1',
        'walking': 'RoboschoolWalker2dGRL-v1'
    }
    misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs}
    mp_cfgs += do_network_based_mujoco(args,
                                       cores,
                                       name='ddpg-walker2d',
                                       nn_params=nn_params,
                                       options=options,
                                       **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    do_multiprocessing_pool(cores, mp_cfgs)
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # Parameters
    runs = range(1)
    #model = 'perturbed'
    model = 'idealized'

    if model == 'idealized':
        tasks = {
            'balancing_tf': 'cfg/leo_balancing_tf.yaml',
            'balancing': 'cfg/leo_balancing.yaml',
            'walking': 'cfg/leo_walking.yaml'
        }
    else:
        tasks = {
            'balancing_tf': 'cfg/leo_perturbed_balancing_tf.yaml',
            'balancing': 'cfg/leo_perturbed_balancing.yaml',
            'walking': 'cfg/leo_perturbed_walking.yaml'
        }

    starting_task = 'balancing_tf'
    misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs}

    mp_cfgs = []
    #    nn_params=("long_curriculum_network", "long_curriculum_network_stat.pkl")
    #    mp_cfgs += do_network_based(args, cores, name='ddpg-cl_long', nn_params=nn_params, **misc)

    nn_params = ("short_curriculum_network",
                 "short_curriculum_network_stat.pkl")
    mp_cfgs += do_network_based_leo(args,
                                    cores,
                                    name='ddpg-cl_short',
                                    nn_params=nn_params,
                                    **misc)

    #    mp_cfgs += do_steps_based(args, cores, name='ddpg-bbw', steps=(20000, 30000, 250000), **misc)
    #    mp_cfgs += do_steps_based(args, cores, name='ddpg-bw',  steps=(   -1, 50000, 250000), **misc)
    #    mp_cfgs += do_steps_based(args, cores, name='ddpg-w',   steps=(   -1,    -1, 300000), **misc)
    #
    #    # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb55', reach_timeout=(5.0, 5.0, 0.0), **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb5', reach_timeout=(-1.0, 5.0, 0.0), **misc)
    #
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb2020', reach_timeout=(20.0, 20.0, 0.0), **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-rb20', reach_timeout=(-1.0, 20.0, 0.0), **misc)

    #    # walker2d
    #    tasks = {
    #        'balancing_tf': 'RoboschoolWalker2dBalancingGRL_TF-v1',
    #        'balancing':    'RoboschoolWalker2dBalancingGRL-v1',
    #        'walking':      'RoboschoolWalker2dGRL-v1'
    #        }
    #    misc = {'tasks':tasks, 'starting_task':starting_task, 'runs':runs}
    #    mp_cfgs += do_network_based_mujoco(args, cores, name='ddpg-cl_short_walker2d', nn_params=nn_params, **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    #do_multiprocessing_pool(cores, mp_cfgs)
    config, tasks, starting_task = mp_cfgs[0]
    cl_run(tasks, starting_task, **config)
Esempio n. 8
0
def main():
    args = parse_args()

    if args['cores']:
        cores = min(cpu_count(), args['cores'])
    else:
        cores = min(cpu_count(), 16)
    print('Using {} cores.'.format(cores))

    # Parameters
    runs = range(16)

    tasks = {
        'balancing_tf': 'cfg/leo_balancing_tf.yaml',
        'balancing': 'cfg/leo_balancing.yaml',
        'walking': 'cfg/leo_walking.yaml'
    }

    starting_task = 'balancing_tf'
    misc = {'tasks': tasks, 'starting_task': starting_task, 'runs': runs}

    args['cl_keep_samples'] = True
    options = {
        'balancing_tf': '',
        'balancing': 'nnload',
        'walking': 'nnload_rbload'
    }

    mp_cfgs = []

    #    # naive switching after achieving the balancing for n number of seconds happening once. 0 means not used
    #    args['reach_timeout_num'] = 1
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb5', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb5', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc)
    #
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb20', reach_timeout=(20.0, 20.0, 0.0), options=options, **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb20', reach_timeout=(-1.0, 20.0, 0.0), options=options, **misc)
    #
    #
    #    # naive switching after achieving the balancing for n number of seconds happening twice. 0 means not used
    #    args['reach_timeout_num'] = 2
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb55', reach_timeout=(5.0, 5.0, 0.0), options=options, **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb55', reach_timeout=(-1.0, 5.0, 0.0), options=options, **misc)
    #
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-three_stage-rb2020', reach_timeout=(20.0, 20.0, 0.0), options=options, **misc)
    #    mp_cfgs += do_reach_timeout_based(args, cores, name='ddpg-exp3-two_stage-rb2020', reach_timeout=(-1.0, 20.0, 0.0), options=options, **misc)

    args['reach_timeout_num'] = 5
    mp_cfgs += do_reach_timeout_based(args,
                                      cores,
                                      name='ddpg-exp3-three_stage-rb55555',
                                      reach_timeout=(5.0, 5.0, 0.0),
                                      options=options,
                                      **misc)
    mp_cfgs += do_reach_timeout_based(args,
                                      cores,
                                      name='ddpg-exp3-two_stage-rb55555',
                                      reach_timeout=(-1.0, 5.0, 0.0),
                                      options=options,
                                      **misc)

    # DBG: export configuration
    export_cfg(mp_cfgs)

    # Run all scripts at once
    random.shuffle(mp_cfgs)
    prepare_multiprocessing()
    do_multiprocessing_pool(cores, mp_cfgs)