Beispiel #1
0
def pivoting_rl(args):
    device = torch.device('cuda:'+str(args.gpu) if torch.cuda.is_available() else 'cpu')
    seed = 1002

    # Set random seed in python std lib, numpy and pytorch
    set_seed(seed)

    vec_env = DummyVecEnvWrapper(
        MujocoEnv('HalfCheetah-v2')
    ).instantiate(parallel_envs=1, seed=seed)

    if args.algo == 'ddpg':
       model, reinforcer = get_ddpg(vec_env, device)
    elif args.algo == 'ppo':
        model, reinforcer = get_ppo(vec_env, device)
    else:
        print('Unknown algo', args.algo); assert(False)


    # Optimizer helper - A weird regularization settings I've copied from OpenAI code
    adam_optimizer = AdamFactory(
        lr=[1.0e-4, 1.0e-3, 1.0e-3],
        weight_decay=[0.0, 0.0, 0.001],
        eps=1.0e-4,
        layer_groups=True
    ).instantiate(model)

    # Overall information store for training information
    training_info = TrainingInfo(
        metrics=[
            EpisodeRewardMetric('episode_rewards'),  # Calculate average reward from episode
        ],
        callbacks=[StdoutStreaming()]  # Print live metrics every epoch to standard output
    )

    # A bit of training initialization bookkeeping...
    training_info.initialize()
    reinforcer.initialize_training(training_info)
    training_info.on_train_begin()

    # Let's make 20 batches per epoch to average metrics nicely
    num_epochs = int(1.0e6 / 2 / 1000)

    # Normal handrolled training loop
    for i in range(1, num_epochs+1):
        epoch_info = EpochInfo(
            training_info=training_info,
            global_epoch_idx=i,
            batches_per_epoch=1000,
            optimizer=adam_optimizer
        )

        reinforcer.train_epoch(epoch_info)

    training_info.on_train_end()
Beispiel #2
0
    def resume_training(self, reinforcer, callbacks, metrics) -> TrainingInfo:
        """ Possibly resume training from a saved state from the storage """
        if self.model_config.reset:
            start_epoch = 0
        else:
            start_epoch = self.storage.last_epoch_idx()

        training_info = TrainingInfo(start_epoch_idx=start_epoch,
                                     run_name=self.model_config.run_name,
                                     metrics=metrics,
                                     callbacks=callbacks)

        if start_epoch == 0:
            self.storage.reset(self.model_config.render_configuration())
            training_info.initialize()
            reinforcer.initialize_training(training_info)
        else:
            self.storage.resume(training_info, reinforcer.model)

        return training_info
    def resume_training(self, learner, callbacks, metrics) -> (TrainingInfo, dict):
        """ Possibly resume training from a saved state from the storage """
        if self.model_config.continue_training:
            start_epoch = self.storage.last_epoch_idx()
        else:
            start_epoch = 0

        training_info = TrainingInfo(
            start_epoch_idx=start_epoch,
            run_name=self.model_config.run_name,
            metrics=metrics,
            callbacks=callbacks
        )

        if start_epoch == 0:
            self.storage.reset(self.model_config.render_configuration())
            training_info.initialize()
            learner.initialize_training(training_info)
            hidden_state = None
        else:
            hidden_state = self.storage.resume(training_info, learner.model)

        return training_info, hidden_state
Beispiel #4
0
                                 weight_decay=[0.0, 0.0, 0.001],
                                 eps=1.0e-4,
                                 layer_groups=True).instantiate(model)

    # Overall information store for training information
    training_info = TrainingInfo(
        metrics=[
            EpisodeRewardMetric(
                'episode_rewards'),  # Calculate average reward from episode
        ],
        callbacks=[StdoutStreaming()
                   ]  # Print live metrics every epoch to standard output
    )

    # A bit of training initialization bookkeeping...
    training_info.initialize()
    reinforcer.initialize_training(training_info)
    training_info.on_train_begin()

    # Let's make 20 batches per epoch to average metrics nicely
    num_epochs = int(1.0e6 / 2 / 1000)

    # Normal handrolled training loop
    for i in range(1, num_epochs + 1):
        epoch_info = EpochInfo(training_info=training_info,
                               global_epoch_idx=i,
                               batches_per_epoch=1000,
                               optimizer=adam_optimizer)

        reinforcer.train_epoch(epoch_info)