Beispiel #1
0
def train_w_parameters(cmd):
    args = parse_command(cmd)

    # convert to dictionary
    params = vars(args)
    params['double_q'] = True
    params['num_agent_train_steps_per_iter'] = 1
    params['num_critic_updates_per_agent_update'] = 1
    params['exploit_weight_schedule'] = ConstantSchedule(1.0)
    params['video_log_freq'] = -1  # This param is not used for DQN
    params['num_timesteps'] = 50000
    params['learning_starts'] = 2000
    params['eps'] = 0.2
    ##################################
    ### CREATE DIRECTORY FOR LOGGING
    ##################################

    if params['env_name'] == 'PointmassEasy-v0':
        params['ep_len'] = 50
    if params['env_name'] == 'PointmassMedium-v0':
        params['ep_len'] = 150
    if params['env_name'] == 'PointmassHard-v0':
        params['ep_len'] = 100
    if params['env_name'] == 'PointmassVeryHard-v0':
        params['ep_len'] = 200

    if params['use_rnd']:
        params['explore_weight_schedule'] = PiecewiseSchedule(
            [(0, 1), (params['num_exploration_steps'], 0)], outside_value=0.0)
    else:
        params['explore_weight_schedule'] = ConstantSchedule(0.0)

    if params['unsupervised_exploration']:
        params['explore_weight_schedule'] = ConstantSchedule(1.0)
        params['exploit_weight_schedule'] = ConstantSchedule(0.0)

        if not params['use_rnd']:
            params['learning_starts'] = params['num_exploration_steps']

    logdir_prefix = 'hw5_expl_'  # keep for autograder
    data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             '../../data')

    if not (os.path.exists(data_path)):
        os.makedirs(data_path)

    logdir = logdir_prefix + args.exp_name + '_' + args.env_name + '_' + time.strftime(
        "%d-%m-%Y_%H-%M-%S")
    logdir = os.path.join(data_path, logdir)
    params['logdir'] = logdir
    if not (os.path.exists(logdir)):
        os.makedirs(logdir)

    print("\n\n\nLOGGING TO: ", logdir, "\n\n\n")

    trainer = Q_Trainer(params)
    trainer.run_training_loop()
Beispiel #2
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name',
                        default='PointmassHard-v0',
                        choices=('PointmassEasy-v0', 'PointmassMedium-v0',
                                 'PointmassHard-v0', 'PointmassVeryHard-v0'))
    parser.add_argument('--use_modified_rew', action='store_true')
    parser.add_argument('--exp_name', type=str, default='todo')

    parser.add_argument('--eval_batch_size', type=int, default=1000)
    parser.add_argument('--batch_size', type=int, default=256)

    parser.add_argument('--use_rnd', action='store_true')
    parser.add_argument('--num_exploration_steps', type=int, default=10000)
    parser.add_argument('--unsupervised_exploration', action='store_true')

    parser.add_argument('--offline_exploitation', action='store_true')
    parser.add_argument('--cql_alpha', type=float, default=0.0)

    parser.add_argument('--exploit_rew_shift', type=float, default=0.0)
    parser.add_argument('--exploit_rew_scale', type=float, default=1.0)

    parser.add_argument('--rnd_output_size', type=int, default=5)
    parser.add_argument('--rnd_n_layers', type=int, default=2)
    parser.add_argument('--rnd_size', type=int, default=400)

    parser.add_argument('--seed', type=int, default=2)
    parser.add_argument('--no_gpu', '-ngpu', action='store_true')
    parser.add_argument('--which_gpu', '-gpu_id', default=0)
    parser.add_argument('--scalar_log_freq', type=int, default=int(1e3))
    parser.add_argument('--save_params', action='store_true')

    args = parser.parse_args()

    # convert to dictionary
    params = vars(args)
    params['double_q'] = True
    params['num_agent_train_steps_per_iter'] = 1
    params['num_critic_updates_per_agent_update'] = 1
    params['exploit_weight_schedule'] = ConstantSchedule(1.0)
    params['video_log_freq'] = -1  # This param is not used for DQN
    params['num_timesteps'] = 50000
    params['learning_starts'] = 2000
    params['eps'] = 0.2
    ##################################
    ### CREATE DIRECTORY FOR LOGGING
    ##################################

    if params['env_name'] == 'PointmassEasy-v0':
        params['ep_len'] = 50
    if params['env_name'] == 'PointmassMedium-v0':
        params['ep_len'] = 150
    if params['env_name'] == 'PointmassHard-v0':
        params['ep_len'] = 100
    if params['env_name'] == 'PointmassVeryHard-v0':
        params['ep_len'] = 200

    if params['use_rnd']:
        params['explore_weight_schedule'] = PiecewiseSchedule(
            [(0, 1), (params['num_exploration_steps'], 0)], outside_value=0.0)
    else:
        params['explore_weight_schedule'] = ConstantSchedule(0.0)

    if params['unsupervised_exploration']:
        params['explore_weight_schedule'] = ConstantSchedule(1.0)
        params['exploit_weight_schedule'] = ConstantSchedule(0.0)

        if not params['use_rnd']:
            params['learning_starts'] = params['num_exploration_steps']

    logdir_prefix = 'hw5_expl_'  # keep for autograder
    data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             '../../data')

    if not (os.path.exists(data_path)):
        os.makedirs(data_path)

    logdir = logdir_prefix + args.exp_name + '_' + args.env_name + '_' + time.strftime(
        "%d-%m-%Y_%H-%M-%S")
    logdir = os.path.join(data_path, logdir)
    params['logdir'] = logdir
    if not (os.path.exists(logdir)):
        os.makedirs(logdir)

    print("\n\n\nLOGGING TO: ", logdir, "\n\n\n")

    trainer = Q_Trainer(params)
    trainer.run_training_loop()
def main():

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--env_name",
        default="PointmassHard-v0",
        choices=(
            "PointmassEasy-v0",
            "PointmassMedium-v0",
            "PointmassHard-v0",
            "PointmassVeryHard-v0",
        ),
    )

    parser.add_argument("--exp_name", type=str, default="todo")

    parser.add_argument("--eval_batch_size", type=int, default=1000)
    parser.add_argument("--batch_size", type=int, default=256)

    parser.add_argument("--use_rnd", action="store_true")
    parser.add_argument("--num_exploration_steps", type=int, default=10000)
    parser.add_argument("--unsupervised_exploration", action="store_true")

    parser.add_argument("--offline_exploitation", action="store_true")
    parser.add_argument("--cql_alpha", type=float, default=0.0)

    parser.add_argument("--exploit_rew_shift", type=float, default=0.0)
    parser.add_argument("--exploit_rew_scale", type=float, default=1.0)

    parser.add_argument("--rnd_output_size", type=int, default=5)
    parser.add_argument("--rnd_n_layers", type=int, default=2)
    parser.add_argument("--rnd_size", type=int, default=400)

    parser.add_argument("--seed", type=int, default=2)
    parser.add_argument("--no_gpu", "-ngpu", action="store_true")
    parser.add_argument("--which_gpu", "-gpu_id", default=0)
    parser.add_argument("--scalar_log_freq", type=int, default=int(1e3))
    parser.add_argument("--save_params", action="store_true")

    # HW5: Custom exploration strategy
    parser.add_argument("--modified_eps_greedy", action="store_true")

    args = parser.parse_args()

    # convert to dictionary
    params = vars(args)
    params["double_q"] = True
    params["num_agent_train_steps_per_iter"] = 1
    params["num_critic_updates_per_agent_update"] = 1
    params["exploit_weight_schedule"] = ConstantSchedule(1.0)
    params["video_log_freq"] = -1  # This param is not used for DQN
    params["num_timesteps"] = 50000
    params["learning_starts"] = 2000
    params["eps"] = 0.2
    ##################################
    ### CREATE DIRECTORY FOR LOGGING
    ##################################

    if params["env_name"] == "PointmassEasy-v0":
        params["ep_len"] = 50
    if params["env_name"] == "PointmassMedium-v0":
        params["ep_len"] = 150
    if params["env_name"] == "PointmassHard-v0":
        params["ep_len"] = 100
    if params["env_name"] == "PointmassVeryHard-v0":
        params["ep_len"] = 200

    if params["use_rnd"]:
        params["explore_weight_schedule"] = PiecewiseSchedule(
            [(0, 1), (params["num_exploration_steps"], 0)], outside_value=0.0
        )
    else:
        params["explore_weight_schedule"] = ConstantSchedule(0.0)

    if params["unsupervised_exploration"]:
        params["explore_weight_schedule"] = ConstantSchedule(1.0)
        params["exploit_weight_schedule"] = ConstantSchedule(0.0)

        if not params["use_rnd"]:
            params["learning_starts"] = params["num_exploration_steps"]

    logdir_prefix = "hw5_expl_"  # keep for autograder
    data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data")

    if not (os.path.exists(data_path)):
        os.makedirs(data_path)

    logdir = (
        logdir_prefix
        + args.exp_name
        + "_"
        + args.env_name
        + "_"
        + time.strftime("%d-%m-%Y_%H-%M-%S")
    )
    logdir = os.path.join(data_path, logdir)
    params["logdir"] = logdir
    if not (os.path.exists(logdir)):
        os.makedirs(logdir)

    print("\n\n\nLOGGING TO: ", logdir, "\n\n\n")

    trainer = Q_Trainer(params)
    trainer.run_training_loop()