Example #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='breakout-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    dataset, env = d3rlpy.datasets.get_atari(args.dataset)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    cql = d3rlpy.algos.DiscreteCQL(
        optim_factory=d3rlpy.models.optimizers.AdamFactory(eps=1e-2 / 32),
        scaler='pixel',
        n_frames=4,
        q_func_factory='qr',
        use_gpu=args.gpu)

    env_scorer = d3rlpy.metrics.evaluate_on_environment(env, epsilon=0.001)

    cql.fit(dataset.episodes,
            eval_episodes=test_episodes,
            n_steps=50000000,
            n_steps_per_epoch=10000,
            scorers={
                'environment': env_scorer,
                'value_scale': d3rlpy.metrics.average_value_estimation_scorer,
            },
            experiment_name=f"DiscreteCQL_{args.dataset}_{args.seed}")
Example #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='breakout-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', action='store_true')
    args = parser.parse_args()

    d3rlpy.seed(args.seed)

    dataset, env = d3rlpy.datasets.get_atari(args.dataset)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    cql = d3rlpy.algos.DiscreteCQL(
        optim_factory=d3rlpy.models.optimizers.AdamFactory(eps=1e-2 / 32),
        scaler='pixel',
        n_frames=4,
        q_func_factory='qr',
        use_gpu=args.gpu)

    scorers = {
        'env': d3rlpy.metrics.scorer.evaluate_on_environment(env,
                                                             epsilon=0.001),
        'value_scale': d3rlpy.metrics.scorer.average_value_estimation_scorer
    }

    cql.fit(dataset.episodes,
            eval_episodes=test_episodes,
            n_epochs=2000,
            scorers=scorers)
Example #3
0
def main(args):
    dataset, env = get_pybullet(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    dynamics = ProbabilisticEnsembleDynamics(use_gpu=device)
    dynamics.fit(train_episodes,
                 eval_episodes=test_episodes,
                 n_steps=100000,
                 scorers={
                     "obs_error": dynamics_observation_prediction_error_scorer,
                     "reward_error": dynamics_reward_prediction_error_scorer,
                 })

    combo = COMBO(q_func_factory=args.q_func,
                  dynamics=dynamics,
                  use_gpu=device)

    combo.fit(train_episodes,
              eval_episodes=test_episodes,
              n_steps=1000000,
              scorers={
                  'environment': evaluate_on_environment(env),
                  'td_error': td_error_scorer,
                  'discounted_advantage': discounted_sum_of_advantage_scorer,
                  'value_scale': average_value_estimation_scorer,
                  'value_std': value_estimation_std_scorer,
                  'action_diff': continuous_action_diff_scorer
              })
Example #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', action='store_true')
    args = parser.parse_args()

    d3rlpy.seed(args.seed)

    dataset, env = d3rlpy.datasets.get_d4rl(args.dataset)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    vae_encoder = d3rlpy.models.encoders.VectorEncoderFactory([750, 750])

    if 'halfcheetah' in env.unwrapped.spec.id.lower():
        kernel = 'gaussian'
    else:
        kernel = 'laplacian'

    bear = d3rlpy.algos.BEAR(imitator_encoder_factory=vae_encoder,
                             temp_learning_rate=0.0,
                             initial_temperature=1e-20,
                             kernel_type=kernel,
                             use_gpu=args.gpu)

    scorers = {
        'environment': d3rlpy.metrics.scorer.evaluate_on_environment(env),
        'value_scale': d3rlpy.metrics.scorer.average_value_estimation_scorer
    }

    bear.fit(dataset.episodes,
             eval_episodes=test_episodes,
             n_epochs=2000,
             scorers=scorers)
Example #5
0
def main(args):
    dataset, env = get_pybullet(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    encoder_factory = VectorEncoderFactory(hidden_units=[256, 256, 256, 256])

    awac = AWAC(actor_encoder_factory=encoder_factory,
                critic_encoder_factory=encoder_factory,
                q_func_factory=args.q_func,
                use_gpu=device)

    awac.fit(train_episodes,
             eval_episodes=test_episodes,
             n_epochs=1000,
             scorers={
                 'environment': evaluate_on_environment(env),
                 'td_error': td_error_scorer,
                 'discounted_advantage': discounted_sum_of_advantage_scorer,
                 'value_scale': average_value_estimation_scorer,
                 'value_std': value_estimation_std_scorer,
                 'action_diff': continuous_action_diff_scorer
             })
Example #6
0
File: cql.py Project: wx-b/d3rlpy
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    dataset, env = d3rlpy.datasets.get_dataset(args.dataset)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    encoder = d3rlpy.models.encoders.VectorEncoderFactory([256, 256, 256])

    cql = d3rlpy.algos.CQL(actor_encoder_factory=encoder,
                           critic_encoder_factory=encoder,
                           alpha_learning_rate=0.0,
                           use_gpu=args.gpu)

    cql.fit(dataset.episodes,
            eval_episodes=test_episodes,
            n_steps=500000,
            n_steps_per_epoch=1000,
            save_interval=10,
            scorers={
                'environment': d3rlpy.metrics.evaluate_on_environment(env),
                'value_scale': d3rlpy.metrics.average_value_estimation_scorer,
            },
            experiment_name=f"CQL_{args.dataset}_{args.seed}")
Example #7
0
File: bcq.py Project: vmbbc/d3rlpy
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    d3rlpy.seed(args.seed)

    dataset, env = d3rlpy.datasets.get_dataset(args.dataset)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    vae_encoder = d3rlpy.models.encoders.VectorEncoderFactory([750, 750])

    rl_encoder = d3rlpy.models.encoders.VectorEncoderFactory([400, 300])

    bcq = d3rlpy.algos.BCQ(actor_encoder_factory=rl_encoder,
                           critic_encoder_factory=rl_encoder,
                           imitator_encoder_factory=vae_encoder,
                           use_gpu=args.gpu)

    scorers = {
        'environment': d3rlpy.metrics.scorer.evaluate_on_environment(env),
        'value_scale': d3rlpy.metrics.scorer.average_value_estimation_scorer
    }

    bcq.fit(dataset.episodes,
            eval_episodes=test_episodes,
            n_steps=500000,
            n_steps_per_epoch=1000,
            save_interval=10,
            scorers=scorers,
            experiment_name=f"BCQ_{args.dataset}_{args.seed}")
Example #8
0
File: sac.py Project: vmbbc/d3rlpy
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='Hopper-v2')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', action='store_true')
    args = parser.parse_args()

    d3rlpy.seed(args.seed)

    env = gym.make(args.env)
    eval_env = gym.make(args.env)

    # setup algorithm
    sac = d3rlpy.algos.SAC(batch_size=256,
                           actor_learning_rate=3e-4,
                           critic_learning_rate=3e-4,
                           temp_learning_rate=3e-4,
                           use_gpu=args.gpu)

    # replay buffer for experience replay
    buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env)

    # start training
    sac.fit_online(env,
                   buffer,
                   eval_env=eval_env,
                   n_steps=1000000,
                   n_steps_per_epoch=10000,
                   update_interval=1,
                   update_start_step=1000)
Example #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', action='store_true')
    args = parser.parse_args()

    d3rlpy.seed(args.seed)

    dataset, env = d3rlpy.datasets.get_d4rl(args.dataset)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    encoder = d3rlpy.models.encoders.VectorEncoderFactory([256, 256, 256])

    cql = d3rlpy.algos.CQL(actor_encoder_factory=encoder,
                           critic_encoder_factory=encoder,
                           alpha_learning_rate=0.0,
                           use_gpu=args.gpu)

    scorers = {
        'environment': d3rlpy.metrics.scorer.evaluate_on_environment(env),
        'value_scale': d3rlpy.metrics.scorer.average_value_estimation_scorer
    }

    cql.fit(dataset.episodes,
            eval_episodes=test_episodes,
            n_epochs=2000,
            scorers=scorers)
Example #10
0
def main(args):
    dataset, env = get_atari(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    bc = DiscreteBC(
        n_frames=4,  # frame stacking
        scaler='pixel',
        use_gpu=args.gpu)

    bc.fit(train_episodes,
           eval_episodes=test_episodes,
           n_epochs=100,
           scorers={'environment': evaluate_on_environment(env, epsilon=0.05)})
Example #11
0
def main(args):
    dataset, env = get_atari(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    bc = DiscreteBC(n_epochs=100,
                    scaler='pixel',
                    use_batch_norm=False,
                    use_gpu=device)

    bc.fit(train_episodes,
           eval_episodes=test_episodes,
           scorers={'environment': evaluate_on_environment(env, epsilon=0.05)})
Example #12
0
def main(args):
    dataset, env = get_pybullet(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    bc = BC(n_epochs=100, use_gpu=device)

    bc.fit(train_episodes,
           eval_episodes=test_episodes,
           scorers={
               'environment': evaluate_on_environment(env),
               'action_diff': continuous_action_diff_scorer
           })
Example #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', action='store_true')
    args = parser.parse_args()

    # get wrapped atari environment
    env = d3rlpy.envs.Atari(gym.make(args.env))
    eval_env = d3rlpy.envs.Atari(gym.make(args.env), is_eval=True)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)
    eval_env.seed(args.seed)

    # setup algorithm
    dqn = d3rlpy.algos.DQN(
        batch_size=32,
        learning_rate=5e-5,
        optim_factory=d3rlpy.models.optimizers.AdamFactory(eps=1e-2 / 32),
        target_update_interval=10000 // 4,
        q_func_factory=d3rlpy.models.q_functions.QRQFunctionFactory(
            n_quantiles=200),
        scaler='pixel',
        n_frames=4,
        use_gpu=args.gpu)

    # replay buffer for experience replay
    buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env)

    # epilon-greedy explorer
    explorer = d3rlpy.online.explorers.LinearDecayEpsilonGreedy(
        start_epsilon=1.0, end_epsilon=0.01, duration=1000000)

    # start training
    dqn.fit_online(env,
                   buffer,
                   explorer,
                   eval_env=eval_env,
                   eval_epsilon=0.001,
                   n_steps=50000000,
                   n_steps_per_epoch=100000,
                   update_interval=4,
                   update_start_step=50000)
Example #14
0
def main(args):
    dataset, env = get_pybullet(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    awr = AWR(n_epochs=100, use_gpu=device)

    awr.fit(train_episodes,
            eval_episodes=test_episodes,
            scorers={
                'environment': evaluate_on_environment(env),
                'td_error': td_error_scorer,
                'value_scale': average_value_estimation_scorer,
                'action_diff': continuous_action_diff_scorer
            })
Example #15
0
def main(args):
    dataset, env = get_pybullet(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    sac = SAC(n_epochs=100, q_func_type=args.q_func_type, use_gpu=device)

    sac.fit(train_episodes,
            eval_episodes=test_episodes,
            scorers={
                'environment': evaluate_on_environment(env),
                'td_error': td_error_scorer,
                'discounted_advantage': discounted_sum_of_advantage_scorer,
                'value_scale': average_value_estimation_scorer,
                'value_std': value_estimation_std_scorer,
                'action_diff': continuous_action_diff_scorer
            })
Example #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    dataset, env = d3rlpy.datasets.get_dataset(args.dataset)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    if 'medium-replay' in env.unwrapped.spec.id.lower():
        vae_encoder = d3rlpy.models.encoders.VectorEncoderFactory([128, 128])
    else:
        vae_encoder = d3rlpy.models.encoders.VectorEncoderFactory([750, 750])

    encoder = d3rlpy.models.encoders.VectorEncoderFactory([400, 300])

    plas = d3rlpy.algos.PLASWithPerturbation(
        actor_encoder_factory=encoder,
        critic_encoder_factory=encoder,
        imitator_encoder_factory=vae_encoder,
        use_gpu=args.gpu)

    plas.fit(
        dataset.episodes,
        eval_episodes=test_episodes,
        n_steps=500000,
        n_steps_per_epoch=1000,
        save_interval=10,
        scorers={
            'environment': d3rlpy.metrics.evaluate_on_environment(env),
            'value_scale': d3rlpy.metrics.average_value_estimation_scorer,
        },
        experiment_name=f"PLASWithPerturbation_{args.dataset}_{args.seed}")
Example #17
0
def main(args):
    dataset, env = get_atari(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    dqn = DQN(
        n_frames=4,  # frame stacking
        q_func_type=args.q_func_type,
        scaler='pixel',
        use_gpu=args.gpu)

    dqn.fit(train_episodes,
            eval_episodes=test_episodes,
            n_epochs=100,
            scorers={
                'environment': evaluate_on_environment(env, epsilon=0.05),
                'td_error': td_error_scorer,
                'discounted_advantage': discounted_sum_of_advantage_scorer,
                'value_scale': average_value_estimation_scorer
            })
Example #18
0
File: bear.py Project: wx-b/d3rlpy
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    dataset, env = d3rlpy.datasets.get_dataset(args.dataset)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    vae_encoder = d3rlpy.models.encoders.VectorEncoderFactory([750, 750])

    if 'halfcheetah' in env.unwrapped.spec.id.lower():
        kernel = 'gaussian'
    else:
        kernel = 'laplacian'

    bear = d3rlpy.algos.BEAR(imitator_encoder_factory=vae_encoder,
                             temp_learning_rate=0.0,
                             initial_temperature=1e-20,
                             mmd_kernel=kernel,
                             use_gpu=args.gpu)

    bear.fit(dataset.episodes,
             eval_episodes=test_episodes,
             n_steps=500000,
             n_steps_per_epoch=1000,
             save_interval=10,
             scorers={
                 'environment': d3rlpy.metrics.evaluate_on_environment(env),
                 'value_scale': d3rlpy.metrics.average_value_estimation_scorer,
             },
             experiment_name=f"BEAR_{args.dataset}_{args.seed}")
Example #19
0
def main(args):
    dataset, env = get_atari(args.dataset)

    d3rlpy.seed(args.seed)

    train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

    device = None if args.gpu is None else Device(args.gpu)

    dqn = DQN(n_epochs=100,
              q_func_type=args.q_func_type,
              scaler='pixel',
              use_batch_norm=False,
              use_gpu=device)

    dqn.fit(train_episodes,
            eval_episodes=test_episodes,
            scorers={
                'environment': evaluate_on_environment(env, epsilon=0.05),
                'td_error': td_error_scorer,
                'discounted_advantage': discounted_sum_of_advantage_scorer,
                'value_scale': average_value_estimation_scorer
            })
Example #20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    # create dataset without masks
    dataset, env = d3rlpy.datasets.get_dataset(args.dataset)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    # prepare dynamics model
    dynamics_encoder = d3rlpy.models.encoders.VectorEncoderFactory(
        hidden_units=[200, 200, 200, 200],
        activation='swish',
    )
    dynamics_optim = d3rlpy.models.optimizers.AdamFactory(weight_decay=2.5e-5)
    dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(
        encoder_factory=dynamics_encoder,
        optim_factory=dynamics_optim,
        learning_rate=1e-3,
        n_ensembles=5,
        use_gpu=args.gpu,
    )

    # train dynamics model
    dynamics.fit(dataset.episodes,
                 eval_episodes=test_episodes,
                 n_steps=100000,
                 scorers={
                     "obs_error": dynamics_observation_prediction_error_scorer,
                     "rew_error": dynamics_reward_prediction_error_scorer,
                 })

    if args.dataset in PARAMETER_TABLE:
        rollout_horizon, lam = PARAMETER_TABLE[args.dataset]
    else:
        rollout_horizon, lam = 5, 1

    # prepare combo
    mopo = d3rlpy.algos.MOPO(dynamics=dynamics,
                             rollout_horizon=rollout_horizon,
                             lam=lam,
                             use_gpu=args.gpu)

    # train combo
    mopo.fit(dataset.episodes,
             eval_episodes=test_episodes,
             n_steps=500000,
             n_steps_per_epoch=1000,
             save_interval=10,
             scorers={
                 "environment": d3rlpy.metrics.evaluate_on_environment(env),
                 'value_scale': d3rlpy.metrics.average_value_estimation_scorer
             },
             experiment_name=f"MOPO_{args.dataset}_{args.seed}")
Example #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='hopper-medium-v0')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--gpu', type=int)
    args = parser.parse_args()

    # create dataset without masks
    dataset, env = d3rlpy.datasets.get_dataset(args.dataset)

    # fix seed
    d3rlpy.seed(args.seed)
    env.seed(args.seed)

    _, test_episodes = train_test_split(dataset, test_size=0.2)

    # prepare dynamics model
    dynamics_encoder = d3rlpy.models.encoders.VectorEncoderFactory(
        hidden_units=[200, 200, 200, 200],
        activation='swish',
    )
    dynamics_optim = d3rlpy.models.optimizers.AdamFactory(weight_decay=2.5e-5)
    dynamics = d3rlpy.dynamics.ProbabilisticEnsembleDynamics(
        encoder_factory=dynamics_encoder,
        optim_factory=dynamics_optim,
        learning_rate=1e-3,
        n_ensembles=5,
        use_gpu=args.gpu,
    )

    # train dynamics model
    dynamics.fit(dataset.episodes,
                 eval_episodes=test_episodes,
                 n_steps=100000,
                 scorers={
                     "obs_error": dynamics_observation_prediction_error_scorer,
                     "rew_error": dynamics_reward_prediction_error_scorer,
                 })

    if 'halfcheetah' in args.dataset:
        conservative_weight = 0.5
    elif 'medium-expert' in args.dataset:
        conservative_weight = 5.0
    elif 'random' in args.dataset or 'medium-replay' in args.dataset:
        if 'hopper' in args.dataset:
            conservative_weight = 1.0
        else:
            conservative_weight = 0.5
    elif 'medium' in args.dataset:
        conservative_weight = 5.0
    else:
        conservative_weight = 1.0

    if 'walker2d' in args.dataset:
        critic_learning_rate = 1e-4
        actor_learning_rate = 1e-5
    else:
        critic_learning_rate = 3e-4
        actor_learning_rate = 1e-4

    # prepare combo
    encoder = d3rlpy.models.encoders.VectorEncoderFactory([256, 256, 256])
    combo = d3rlpy.algos.COMBO(dynamics=dynamics,
                               actor_encoder_factory=encoder,
                               critic_encoder_factory=encoder,
                               actor_learning_rate=actor_learning_rate,
                               critic_learning_rate=critic_learning_rate,
                               temp_learning_rate=actor_learning_rate,
                               conservative_weight=conservative_weight,
                               use_gpu=args.gpu)

    # train combo
    combo.fit(dataset.episodes,
              eval_episodes=test_episodes,
              n_steps=500000,
              n_steps_per_epoch=1000,
              save_interval=10,
              scorers={
                  "environment": d3rlpy.metrics.evaluate_on_environment(env),
                  'value_scale': d3rlpy.metrics.average_value_estimation_scorer
              },
              experiment_name=f"COMBO_{args.dataset}_{args.seed}")