Ejemplo n.º 1
0
def run_sac_experiment(main,
                       mode,
                       include_folders=None,
                       log_dir=None,
                       exp_prefix="experiment",
                       exp_name=None,
                       **kwargs):
    if exp_name is None:
        exp_name = timestamp()

    if log_dir is None:
        log_dir = os.path.join(DEFAULT_LOG_DIR, "local",
                               exp_prefix.replace("_", "-"), exp_name)

    if include_folders is None:
        include_folders = list()

    if mode == 'ec2':
        include_folders.append('sac')
        all_symlinks = list()

        for folder in include_folders:
            all_symlinks.append(_create_symlink(folder))

        kwargs.update(added_project_directories=all_symlinks)

    run_experiment_lite(stub_method_call=main,
                        mode=mode,
                        exp_prefix=exp_prefix,
                        exp_name=exp_name,
                        log_dir=log_dir,
                        **kwargs)
Ejemplo n.º 2
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cost_type', type=float, default=10)
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default=None)
    parser.add_argument('--task',
                        type=str,
                        choices=AVAILABLE_TASKS,
                        default='default')
    parser.add_argument('--policy',
                        type=str,
                        choices=('gaussian', 'gmm', 'lsp'),
                        default='gaussian')
    parser.add_argument('--envn', type=int, default=10)
    parser.add_argument('--scale', type=float, default=1)
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    args = parser.parse_args()
    args.env = ENVS[args.envn]
    log_dir = '/home/wisrl/Downloads/log_sac/Mujoco/' + args.env
    log_dir += '_c%s'%args.cost_type

    if args.task == 'delayed':
        log_dir += '_delayed%s'%str(DELAY_CONST)
    if 'cross' in args.task:
        log_dir += '_cross'
    log_dir = log_dir + '/SAC'
    if not args.scale==1.0:
        log_dir += '_s%s'%str(args.scale)
    args.log_dir = log_dir

    return args
Ejemplo n.º 3
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default=None)
    parser.add_argument('--task',
                        type=str,
                        choices=AVAILABLE_TASKS,
                        default='default')
    parser.add_argument('--policy',
                        type=str,
                        choices=('gaussian', 'gmm', 'lsp'),
                        default='gaussian')
    parser.add_argument('--env', type=str, default=DEFAULT_ENV)
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)

    ## RC: Added to run time-complexity experiments.
    parser.add_argument('--n_train_repeat', type=int, default=1)
    parser.add_argument('--n_parallel', type=int, default=1)
    parser.add_argument('--n_epochs', type=int, default=1000)
    parser.add_argument('--gpu_fraction', type=float, default=1.0)

    args = parser.parse_args()

    return args
Ejemplo n.º 4
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        type=str,
                        choices=AVAILABLE_ENVS,
                        default='swimmer')
    parser.add_argument('--exp_name',type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)
    args = parser.parse_args()

    return args
Ejemplo n.º 5
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default=None)
    parser.add_argument('--task',
                        type=str,
                        choices=AVAILABLE_TASKS,
                        default='default')
    parser.add_argument('--policy',
                        type=str,
                        choices=('gaussian', 'gmm', 'lsp'),
                        default='gaussian')
    parser.add_argument('--env', type=str, default=DEFAULT_ENV)
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--tau', type=float, default=0.005)
    parser.add_argument('--log_dir', type=str, default=None)
    parser.add_argument('--lr', type=float, default=-1.0)
    parser.add_argument('--l1regpi', type=float, default=0.0)  #L1 reg policy
    parser.add_argument('--l2regpi', type=float, default=0.0)  #L2 reg policy
    parser.add_argument(
        '--l1regvf', type=float, default=0.0
    )  #L1 reg value (V only, the two Q networks are not regularized for simplicity)
    parser.add_argument('--l2regvf', type=float, default=0.0)  #L2 reg value
    parser.add_argument('--wclippi', type=float,
                        default=0.0)  #Weight clip policy
    parser.add_argument('--wclipvf', type=float,
                        default=0.0)  #Weight clip value
    parser.add_argument('--dropoutpi', type=float,
                        default=1.0)  #Dropout policy keep prob
    parser.add_argument('--dropoutvf', type=float,
                        default=1.0)  #Dropout value keep prob
    parser.add_argument(
        '--ent_coef', type=float, default=0.0
    )  #Entropy regularization coefficient, not the temperature of maximum entropy formulation (In this SAC implementation, this temperature term is fixed to be 1.
    parser.add_argument('--batchnormpi', type=bool,
                        default=False)  #Batchnorm policy (T/F)
    parser.add_argument('--batchnormvf', type=bool,
                        default=False)  #Batchnorm value (T/F)
    #parser.add_argument('--gaussianreg', type=float, default=1e-3) #This term appears in the original code release and regularizes the mu and logsigma of policy output. However, this regularization term is not applied when we use gaussian policy. Since all of our experiements adopt gaussian policy, this term does not affect our result in anyway.
    parser.add_argument('--reward_scale', type=float, default=-1.0)
    parser.add_argument('--num_hidden', type=int, default=256)
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--policypath', type=str,
                        default='')  #Policy network save path
    parser.add_argument('--valuepath', type=str,
                        default='')  #Value network save path
    args = parser.parse_args()

    return args
Ejemplo n.º 6
0
def main():
    args = parse_args()
    variant = {'policy_type': args.policy_type}

    run_experiment_lite(
        run,
        exp_prefix='multigoal',
        exp_name=timestamp(),
        variant=variant,
        snapshot_mode='last',
        n_parallel=1,
        seed=1,
        mode='local',
    )
Ejemplo n.º 7
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        type=str,
                        choices=AVAILABLE_ENVS,
                        default='swimmer')
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default='./logs/unity')
    parser.add_argument('--idx', type=int, default=0)
    parser.add_argument('--no_graphics', type=bool, default=False)
    args = parser.parse_args()

    return args
Ejemplo n.º 8
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        type=str,
                        choices=AVAILABLE_ENVS,
                        default=DEFAULT_ENV)
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)
    parser.add_argument('--low_level_policy_path',
                        '-p',
                        type=str,
                        default=None)
    args = parser.parse_args()

    return args
Ejemplo n.º 9
0
def launch_experiments(args):

    num_experiments = 1
    print('Launching {} experiments.'.format(num_experiments))
    for i in range(num_experiments):
        print("Experiment: {}/{}".format(i + 1, num_experiments))

        run_sac_experiment(
            run_experiment,
            mode='local',
            n_parallel=1,
            terminate_machine=True,
            log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()),
            snapshot_mode='gap',
            snapshot_gap=100,
            sync_s3_pkl=True,
        )
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default='ant-cross-maze')
    parser.add_argument('--policy',
                        type=str,
                        choices=('gaussian', 'gaussian_ptr'),
                        default='gaussian_ptr')
    parser.add_argument('--env', type=str, default=DEFAULT_ENV)
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)
    args = parser.parse_args()

    return args
Ejemplo n.º 11
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env',
                        type=str,
                        choices=AVAILABLE_ENVS,
                        default='swimmer')
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)

    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--num_skills', type=int, default=None)
    parser.add_argument('--eval_freq', type=int, default=None)
    parser.add_argument('--xdir', default=None)
    parser.add_argument('--xname', default=None)

    args = parser.parse_args()

    return args
Ejemplo n.º 12
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--domain',
                        type=str,
                        default='Baxter')
    parser.add_argument('--task',
                        type=str,
                        default='default')
    parser.add_argument('--policy',
                        type=str,
                        choices=('gaussian', 'gmm', 'lsp'),
                        default='gaussian')
    parser.add_argument('--env', type=str, default='BaxterReachNG')
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)
    args = parser.parse_args()

    return args
Ejemplo n.º 13
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default=None)
    parser.add_argument('--task',
                        type=str,
                        choices=AVAILABLE_TASKS,
                        default='default')
    parser.add_argument('--policy',
                        type=str,
                        choices=('lsp', 'gmm'),
                        default='lsp')
    parser.add_argument('--env', type=str, default=DEFAULT_ENV)
    parser.add_argument('--exp_name', type=str, default=timestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log_dir', type=str, default=None)
    args = parser.parse_args()

    return args
Ejemplo n.º 14
0
def launch_experiments():
    args = arg()
    num_experiments = 1
    print('Launching {} experiments.'.format(num_experiments))
    for i in range(num_experiments):
        print("Experiment: {}/{}".format(i + 1, num_experiments))
        experiment_prefix = args.domain + '/' + args.exp_name
        experiment_name = '{prefix}-{exp_name}-{i:02}'.format(
            prefix=args.domain, exp_name=args.exp_name, i=0)

        run_sac_experiment(
            run_experiment,
            mode='local',
            exp_prefix=experiment_prefix,
            exp_name=experiment_name,
            n_parallel=1,
            terminate_machine=True,
            log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()),
            snapshot_mode='gap',
            snapshot_gap=100,
            sync_s3_pkl=True,
        )
Ejemplo n.º 15
0
        env=env,
        policy=policy,
        initial_exploration_policy=initial_exploration_policy,
        pool=pool,
        qf1=qf1,
        qf2=qf2,
        vf=vf,
        lr=3e-4,
        scale_reward=20,
        discount=0.99,
        tau=0.005,
        reparameterize=True,
        target_update_interval=1,
        action_prior='uniform',
        save_full_state=False,
    )

    algorithm._sess.run(tf.global_variables_initializer())

    algorithm.train()


if __name__ == "__main__":
    run_sac_experiment(
        run_experiment,
        mode='local',
        log_dir='/root/code/log/prim/reach/{0}'.format(timestamp()),
        snapshot_mode='gap',
        snapshot_gap=100,
    )
Ejemplo n.º 16
0
        env=env,
        policy=policy,
        initial_exploration_policy=initial_exploration_policy,
        pool=pool,
        qf1=qf1,
        qf2=qf2,
        vf=vf,
        lr=3e-4,
        scale_reward=20,
        discount=0.99,
        tau=0.005,
        reparameterize=True,
        target_update_interval=1,
        action_prior='uniform',
        save_full_state=False,
    )

    algorithm._sess.run(tf.global_variables_initializer())

    algorithm.train()


if __name__ == "__main__":
    run_sac_experiment(
        run_experiment,
        mode='local',
        log_dir='/root/code/log/prim/pick/{0}'.format(timestamp()),
        snapshot_mode='gap',
        snapshot_gap=100,
    )
        # plotter=plotter,
        lr=1e-3,
        discount=0.99,
        #tau=1e-4,
        target_update_interval=20,
        # reparameterize=False,
        save_full_state=False
    )

    algo.train()


if __name__ == "__main__":

    exp_prefix = 'DQN-VoltVar34-exp2'
    exp_name = timestamp()
    log_dir = os.path.join(
        DEFAULT_LOG_DIR,
        "local",
        exp_prefix.replace("_", "-"),
        exp_name)
    os.makedirs(log_dir,exist_ok=True)
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode('last')
    tabular_log_file = os.path.join(log_dir,'progress.csv')
    text_log_file = os.path.join(log_dir,'debug.log')
    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)

    run_experiment()
    # run_sac_experiment(