Ejemplo n.º 1
0
def launch_experiments(variant_generator):
    variants = variant_generator.variants()

    num_experiments = len(variants)
    print('Launching {} experiments.'.format(num_experiments))

    for i, variant in enumerate(variants):
        if variant['seed'] == 'random':
            variant['seed'] = np.random.randint(1, 100)
        print("Experiment: {}/{}".format(i, num_experiments))
        experiment_prefix = variant['prefix'] + '/' + args.exp_name
        experiment_name = (variant['prefix'] + '-' + args.exp_name + '-' +
                           str(i).zfill(2))

        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            variant=variant,
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=1,
            seed=variant['seed'],
            terminate_machine=True,
            log_dir=args.log_dir,
            snapshot_mode=variant['snapshot_mode'],
            snapshot_gap=variant['snapshot_gap'],
            sync_s3_pkl=variant['sync_pkl'],
        )

        sys.exit()
Ejemplo n.º 2
0
def launch_experiments(variant_generator):
    variants = variant_generator.variants()

    num_experiments = len(variants)
    print('Launching {} experiments.'.format(num_experiments))

    exp_name = 'acer_test1'
    for i, variant in enumerate(variants):
        print("Experiment: {}/{}".format(i, num_experiments))
        experiment_prefix = 'acer_baselines_final/' + variant['env'] + '/'
        experiment_name = (exp_name + '-' + str(i).zfill(2))

        run_sac_experiment(
            run_acer,
            mode='local',
            variant=variant,
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=1,
            seed=variant['seed'],
            terminate_machine=True,
            snapshot_mode='last',
            snapshot_gap=1000,
            sync_s3_pkl=False,
        )
Ejemplo n.º 3
0
def launch_experiments(variant_generator, args):
    variants = variant_generator.variants()
    # TODO: Remove unflatten. Our variant generator should support nested params
    variants = [unflatten(variant, separator='.') for variant in variants]

    num_experiments = len(variants)
    print('Launching {} experiments.'.format(num_experiments))

    for i, variant in enumerate(variants):
        print("Experiment: {}/{}".format(i, num_experiments))
        run_params = variant['run_params']
        algo_params = variant['algorithm_params']
        variant['algorithm_params']['scale'] = args.scale
        variant['algorithm_params']['cost_type'] = args.cost_type

        experiment_prefix = variant['prefix'] + '/' + args.exp_name
        experiment_name = '{prefix}-{exp_name}-{i:02}'.format(
            prefix=variant['prefix'], exp_name=args.exp_name, i=i)

        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            variant=variant,
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=1,
            seed=run_params['seed'],
            terminate_machine=True,
            log_dir=args.log_dir,
            snapshot_mode=run_params['snapshot_mode'],
            snapshot_gap=run_params['snapshot_gap'],
            sync_s3_pkl=run_params['sync_pkl'],
        )
Ejemplo n.º 4
0
def launch_experiments(variant_generator):
    variants = variant_generator.variants()

    for i, variant in enumerate(variants):
        tag = 'finetune__'
        print(variant['snapshot_filename'])
        tag += variant['snapshot_filename'].split('/')[-2]
        tag += '____'
        tag += '__'.join(['%s_%s' % (key, variant[key]) for key in TAG_KEYS])
        log_dir = os.path.join(args.log_dir, tag)
        variant['video_dir'] = os.path.join(log_dir, 'videos')
        print('Launching {} experiments.'.format(len(variants)))
        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            variant=variant,
            exp_prefix=variant['prefix'] + '/' + args.exp_name,
            exp_name=variant['prefix'] + '-' + args.exp_name + '-' +
            str(i).zfill(2),
            n_parallel=1,  # Increasing this barely effects performance,
            # but breaks learning of hierarchical policy.
            seed=variant['seed'],
            terminate_machine=True,
            log_dir=log_dir,
            snapshot_mode=variant['snapshot_mode'],
            snapshot_gap=variant['snapshot_gap'],
            sync_s3_pkl=variant['sync_pkl'],
        )
Ejemplo n.º 5
0
def launch_experiments(variant_generator, args):
    variants = variant_generator.variants()
    # TODO: Remove unflatten. Our variant generator should support nested params
    variants = [unflatten(variant, separator='.') for variant in variants]

    num_experiments = len(variants)
    print('Launching {} experiments.'.format(num_experiments))

    for i, variant in enumerate(variants):
        print("Experiment: {}/{}".format(i, num_experiments))
        run_params = variant['run_params']
        algo_params = variant['algorithm_params']

        experiment_prefix = variant['prefix'] + '/' + args.exp_name
        experiment_name = '{prefix}-{exp_name}-{i:02}'.format(
            prefix=variant['prefix'], exp_name=args.exp_name, i=args.seed)

        ## Hacks to get it to work while we figure out code!
        variant['algorithm_params']['base_kwargs'][
            'n_train_repeat'] = args.n_train_repeat
        variant['algorithm_params']['base_kwargs']['n_epochs'] = args.n_epochs
        variant['algorithm_params']['base_kwargs'][
            'gpu_fraction'] = args.gpu_fraction

        #algo_params['base_kwargs']['n_epochs'] = 2000.0
        log_dir = os.path.join(args.log_dir, experiment_name)
        ##

        print('run params: {}'.format(variant['run_params']))
        print('algorithm_params: {}'.format(variant['algorithm_params']))
        print('env_params: {}'.format(variant['env_params']))
        print('value_fn_params: {}'.format(variant['value_fn_params']))
        print('replay_buffer_params: {}'.format(
            variant['replay_buffer_params']))
        print('policy_params: {}'.format(variant['policy_params']))
        print('sampler_params: {}'.format(variant['sampler_params']))
        print('task: {}'.format(variant['task']))

        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            variant=variant,
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=args.n_parallel,
            seed=args.seed,  #run_params['seed'],
            terminate_machine=True,
            log_dir=log_dir,  # RC: TODO change back to args.logdir
            snapshot_mode=run_params['snapshot_mode'],
            snapshot_gap=run_params['snapshot_gap'],
            sync_s3_pkl=run_params['sync_pkl'],
        )
Ejemplo n.º 6
0
def launch_experiments(variant_generator, args):
    variants = variant_generator.variants()
    # TODO: Remove unflatten. Our variant generator should support nested params
    variants = [unflatten(variant, separator='.') for variant in variants]

    print('Launching seed={} experiment.'.format(args.seed))
    variant = variants[args.seed - 1]
    variant['lr'] = args.lr
    variant['tau'] = args.tau
    variant['l1regpi'] = args.l1regpi
    variant['l2regpi'] = args.l2regpi
    variant['l1regvf'] = args.l1regvf
    variant['l2regvf'] = args.l2regvf
    variant['wclippi'] = args.wclippi
    variant['wclipvf'] = args.wclipvf
    variant['dropoutpi'] = args.dropoutpi
    variant['dropoutvf'] = args.dropoutvf
    variant['ent_coef'] = args.ent_coef
    variant['batchnormpi'] = args.batchnormpi
    variant['batchnormvf'] = args.batchnormvf
    variant['reward_scale'] = args.reward_scale
    variant['num_hidden'] = args.num_hidden
    variant['policypath'] = args.policypath
    variant['valuepath'] = args.valuepath

    print("Variant for this experiment:", variant)
    run_params = variant['run_params']
    algo_params = variant['algorithm_params']

    experiment_prefix = variant['prefix'] + '/' + args.exp_name
    experiment_name = '{prefix}-{exp_name}-{i:02}'.format(
        prefix=variant['prefix'], exp_name=args.exp_name, i=args.seed)

    run_sac_experiment(
        run_experiment,
        mode=args.mode,
        variant=variant,
        exp_prefix=experiment_prefix,
        exp_name=experiment_name,
        n_parallel=1,
        seed=run_params['seed'],
        terminate_machine=True,
        log_dir=args.log_dir,
        snapshot_mode=run_params['snapshot_mode'],
        snapshot_gap=run_params['snapshot_gap'],
        sync_s3_pkl=run_params['sync_pkl'],
    )
Ejemplo n.º 7
0
def launch_experiments(args):

    num_experiments = 1
    print('Launching {} experiments.'.format(num_experiments))
    for i in range(num_experiments):
        print("Experiment: {}/{}".format(i + 1, num_experiments))

        run_sac_experiment(
            run_experiment,
            mode='local',
            n_parallel=1,
            terminate_machine=True,
            log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()),
            snapshot_mode='gap',
            snapshot_gap=100,
            sync_s3_pkl=True,
        )
Ejemplo n.º 8
0
def launch_experiments(variant_generator):
    variants = variant_generator.variants()

    for i, variant in enumerate(variants):
        print('Launching {} experiments.'.format(len(variants)))
        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            variant=variant,
            exp_prefix=variant['prefix'] + '/' + args.exp_name,
            exp_name=variant['prefix'] + '-' + args.exp_name + '-' + str(i).zfill(2),
            n_parallel=1,
            seed=variant['seed'],
            terminate_machine=True,
            log_dir=args.log_dir,
            # use_cloudpickle=True,
            snapshot_mode=variant['snapshot_mode'],
            snapshot_gap=variant['snapshot_gap'],
            sync_s3_pkl=variant['sync_pkl'],
        )
Ejemplo n.º 9
0
def launch_experiments(variant_generator):
    variants = variant_generator.variants()

    for i, variant in enumerate(variants):
        log_dir = get_logdir(args, variant)
        print('Launching {} experiments.'.format(len(variants)))
        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            variant=variant,
            exp_prefix=variant['prefix'] + '/' + args.exp_name,
            exp_name=variant['prefix'] + '-' + args.exp_name + '-' + str(i).zfill(2),
            n_parallel=1,  # Increasing this barely effects performance,
                           # but breaks learning of hierarchical policy.
            seed=variant['seed'],
            terminate_machine=True,
            log_dir=log_dir,
            snapshot_mode=variant['snapshot_mode'],
            snapshot_gap=variant['snapshot_gap'],
            sync_s3_pkl=variant['sync_pkl'],
        )
Ejemplo n.º 10
0
def launch_experiments():
    args = arg()
    num_experiments = 1
    print('Launching {} experiments.'.format(num_experiments))
    for i in range(num_experiments):
        print("Experiment: {}/{}".format(i + 1, num_experiments))
        experiment_prefix = args.domain + '/' + args.exp_name
        experiment_name = '{prefix}-{exp_name}-{i:02}'.format(
            prefix=args.domain, exp_name=args.exp_name, i=0)

        run_sac_experiment(
            run_experiment,
            mode='local',
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=1,
            terminate_machine=True,
            log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()),
            snapshot_mode='gap',
            snapshot_gap=100,
            sync_s3_pkl=True,
        )
def launch_experiments(args):

    num_experiments = 5
    print('Launching {} experiments.'.format(num_experiments))
    i = 0
    if i == 0:
        print("Experiment: {}/{}".format(i, num_experiments))
        experiment_prefix = 'ant/cross-maze' + '/' + args.exp_name
        experiment_name = '{prefix}-{exp_name}-{i:02}'.format(
            prefix='ant/cross-maze', exp_name=args.exp_name, i=0)

        run_sac_experiment(
            run_experiment,
            mode=args.mode,
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=1,
            terminate_machine=True,
            log_dir=args.log_dir,
            snapshot_mode='gap',
            snapshot_gap=1000,
            sync_s3_pkl=True,
        )
Ejemplo n.º 12
0
        env=env,
        policy=policy,
        initial_exploration_policy=initial_exploration_policy,
        pool=pool,
        qf1=qf1,
        qf2=qf2,
        vf=vf,
        lr=3e-4,
        scale_reward=20,
        discount=0.99,
        tau=0.005,
        reparameterize=True,
        target_update_interval=1,
        action_prior='uniform',
        save_full_state=False,
    )

    algorithm._sess.run(tf.global_variables_initializer())

    algorithm.train()


if __name__ == "__main__":
    run_sac_experiment(
        run_experiment,
        mode='local',
        log_dir='/root/code/log/prim/reach/{0}'.format(timestamp()),
        snapshot_mode='gap',
        snapshot_gap=100,
    )