Ejemplo n.º 1
0
def pivoting_rl(args):
    device = torch.device('cuda:'+str(args.gpu) if torch.cuda.is_available() else 'cpu')
    seed = 1002

    # Set random seed in python std lib, numpy and pytorch
    set_seed(seed)

    vec_env = DummyVecEnvWrapper(
        MujocoEnv('HalfCheetah-v2')
    ).instantiate(parallel_envs=1, seed=seed)

    if args.algo == 'ddpg':
       model, reinforcer = get_ddpg(vec_env, device)
    elif args.algo == 'ppo':
        model, reinforcer = get_ppo(vec_env, device)
    else:
        print('Unknown algo', args.algo); assert(False)


    # Optimizer helper - A weird regularization settings I've copied from OpenAI code
    adam_optimizer = AdamFactory(
        lr=[1.0e-4, 1.0e-3, 1.0e-3],
        weight_decay=[0.0, 0.0, 0.001],
        eps=1.0e-4,
        layer_groups=True
    ).instantiate(model)

    # Overall information store for training information
    training_info = TrainingInfo(
        metrics=[
            EpisodeRewardMetric('episode_rewards'),  # Calculate average reward from episode
        ],
        callbacks=[StdoutStreaming()]  # Print live metrics every epoch to standard output
    )

    # A bit of training initialization bookkeeping...
    training_info.initialize()
    reinforcer.initialize_training(training_info)
    training_info.on_train_begin()

    # Let's make 20 batches per epoch to average metrics nicely
    num_epochs = int(1.0e6 / 2 / 1000)

    # Normal handrolled training loop
    for i in range(1, num_epochs+1):
        epoch_info = EpochInfo(
            training_info=training_info,
            global_epoch_idx=i,
            batches_per_epoch=1000,
            optimizer=adam_optimizer
        )

        reinforcer.train_epoch(epoch_info)

    training_info.on_train_end()
Ejemplo n.º 2
0
        metrics=[
            EpisodeRewardMetric(
                'episode_rewards'),  # Calculate average reward from episode
        ],
        callbacks=[StdoutStreaming()
                   ]  # Print live metrics every epoch to standard output
    )

    # A bit of training initialization bookkeeping...
    training_info.initialize()
    reinforcer.initialize_training(training_info)
    training_info.on_train_begin()

    # Let's make 20 batches per epoch to average metrics nicely
    num_epochs = int(1.0e6 / 2 / 1000)

    # Normal handrolled training loop
    for i in range(1, num_epochs + 1):
        epoch_info = EpochInfo(training_info=training_info,
                               global_epoch_idx=i,
                               batches_per_epoch=1000,
                               optimizer=adam_optimizer)

        reinforcer.train_epoch(epoch_info)

    training_info.on_train_end()


if __name__ == '__main__':
    half_cheetah_ddpg()