Esempio n. 1
0
 def metrics(self) -> list:
     """ List of metrics to track for this learning process """
     return [
         AveragingNamedMetric("loss"),
         AveragingNamedMetric("average_q_selected"),
         AveragingNamedMetric("average_q_target"),
         AveragingNamedMetric("grad_norm"),
     ]
Esempio n. 2
0
 def metrics(self) -> list:
     """ List of metrics to track for this learning process """
     return [
         AveragingNamedMetric("new_policy_loss"),
         AveragingNamedMetric("policy_entropy"),
         AveragingNamedMetric("value_loss"),
         AveragingNamedMetric("policy_optimization_success"),
         AveragingNamedMetric("policy_improvement_ratio"),
         AveragingNamedMetric("kl_divergence_step"),
         AveragingNamedMetric("policy_loss_improvement"),
         AveragingNamedMetric("grad_norm"),
         AveragingNamedMetric("advantage_norm"),
         AveragingNamedMetric("explained_variance")
     ]
Esempio n. 3
0
 def metrics(self) -> list:
     """ List of metrics to track for this learning process """
     return [
         AveragingNamedMetric("policy_loss"),
         AveragingNamedMetric("value_loss"),
         AveragingNamedMetric("policy_entropy"),
         AveragingNamedMetric("approx_kl_divergence"),
         AveragingNamedMetric("clip_fraction"),
         AveragingNamedMetric("grad_norm"),
         AveragingNamedMetric("advantage_norm"),
         AveragingNamedMetric("explained_variance")
     ]
Esempio n. 4
0
    def run(self):
        """ Run the command with supplied configuration """
        device = torch.device(self.model_config.device)
        learner = Learner(device, self.model.instantiate())

        lr_schedule = interp.interpolate_series(self.start_lr, self.end_lr,
                                                self.num_it,
                                                self.interpolation)

        if self.freeze:
            learner.model.freeze()

        # Optimizer shoudl be created after freeze
        optimizer = self.optimizer_factory.instantiate(learner.model)

        iterator = iter(self.source.train_loader())

        # Metrics to track through this training
        metrics = learner.metrics() + [AveragingNamedMetric("lr")]

        learner.train()

        best_value = None

        training_info = TrainingInfo(start_epoch_idx=0, metrics=metrics)

        # Treat it all as one epoch
        epoch_info = EpochInfo(training_info,
                               global_epoch_idx=1,
                               batches_per_epoch=1,
                               optimizer=optimizer)

        for iteration_idx, lr in enumerate(tqdm.tqdm(lr_schedule)):
            batch_info = BatchInfo(epoch_info, iteration_idx)

            # First, set the learning rate, the same for each parameter group
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

            try:
                data, target = next(iterator)
            except StopIteration:
                iterator = iter(self.source.train_loader())
                data, target = next(iterator)

            learner.train_batch(batch_info, data, target)

            batch_info['lr'] = lr

            # METRIC RECORDING PART
            epoch_info.result_accumulator.calculate(batch_info)

            current_value = epoch_info.result_accumulator.intermediate_value(
                self.metric)

            final_metrics = {
                'epoch_idx': iteration_idx,
                self.metric: current_value,
                'lr': lr
            }

            if best_value is None or current_value < best_value:
                best_value = current_value

            # Stop on divergence
            if self.stop_dv and (np.isnan(current_value) or current_value >
                                 best_value * self.divergence_threshold):
                break

            training_info.history.add(final_metrics)

        frame = training_info.history.frame()

        fig, ax = plt.subplots(1, 2)

        ax[0].plot(frame.index, frame.lr)
        ax[0].set_title("LR Schedule")
        ax[0].set_xlabel("Num iterations")
        ax[0].set_ylabel("Learning rate")

        if self.interpolation == 'logscale':
            ax[0].set_yscale("log", nonposy='clip')

        ax[1].plot(frame.lr, frame[self.metric], label=self.metric)
        # ax[1].plot(frame.lr, frame[self.metric].ewm(com=20).mean(), label=self.metric + ' smooth')
        ax[1].set_title(self.metric)
        ax[1].set_xlabel("Learning rate")
        ax[1].set_ylabel(self.metric)
        # ax[1].legend()

        if self.interpolation == 'logscale':
            ax[1].set_xscale("log", nonposx='clip')

        plt.show()
Esempio n. 5
0
File: ddpg.py Progetto: yulkang/vel
 def metrics(self) -> list:
     """ List of metrics to track for this learning process """
     return [
         AveragingNamedMetric("value_loss"),
         AveragingNamedMetric("policy_loss"),
     ]
Esempio n. 6
0
 def metrics(self) -> list:
     """ List of metrics to track for this learning process """
     return [
         AveragingNamedMetric("q_loss"),
         AveragingNamedMetric("policy_entropy"),
         AveragingNamedMetric("policy_loss"),
         AveragingNamedMetric("policy_gradient_loss"),
         AveragingNamedMetric("policy_gradient_bias_correction"),
         AveragingNamedMetric("explained_variance"),
         AveragingNamedMetric("advantage_norm"),
         AveragingNamedMetric("grad_norm"),
         AveragingNamedMetric("model_prob_std"),
         AveragingNamedMetric("rollout_prob_std"),
         AveragingNamedMetric("avg_q_selected"),
         AveragingNamedMetric("avg_q_retraced")
     ]