Beispiel #1
0
    def on_validation_epoch_end(self, trainer, pl_module):
        x = pl_module.generate(1000)
        y_hat = pl_module.forward(x)

        # Log class balance of predictions #
        # f_fake = pred_label_fraction(y_hat, 0)
        # pl_module.log("generator/f_fake", f_fake, on_step=False, on_epoch=True)

        # Softmax entropy #
        H = entropy(y_hat)
        trainer.logger.experiment.log({"softmax entropy": wandb.Histogram(H)})

        H = H.tolist()
        data = [[h, pl_module.current_epoch] for h in H]
        table = wandb.Table(data=data, columns=["entropy", "epoch"])
        trainer.logger.experiment.log(
            {
                "generator/entropy": wandb.plot.histogram(
                    table, "entropy", title="softmax entropy (fake)"
                )
            }
        )
        trainer.logger.experiment.log({"softmax entropy": wandb.Histogram(H)})

        if config["train"]["fid"]:
            fid_l = pl_module.fid(pl_module.trainer.datamodule.train_l)
            fid_u = pl_module.fid(pl_module.trainer.datamodule.train_u)
            pl_module.log("u/fid", fid_u)
            pl_module.log("l/fid", fid_l)
    def training_step(self, data, batch_idx):

        _, y_i_, y_j_ = self(data['QM9'])
        mse1 = self.mse_loss(input=y_i_.flatten(), target=data['QM9']['y_i'].y_norm)
        mse2 = self.mse_loss(input=y_j_.flatten(), target=data['QM9']['y_j'].y_norm)
        mse = mse1 + mse2

        y_ij, _, _ = self(data['DDI'])
        y_pred = y_ij.squeeze()
        y_true = data['DDI'].binary_y.float()
        bce = self.bce_loss(input=y_pred, target=y_true)

        loss = mse + bce

        wandb.log({"train/loss": loss})

        wandb.log({'train/y_i_pred': y_i_.flatten()})
        wandb.log({'train/y_i_true': data['QM9']['y_i'].y_norm})
        wandb.log({'train/y_j_pred': y_j_.flatten()})
        wandb.log({'train/y_j_true': data['QM9']['y_j'].y_norm})

        wandb.log({"train/y_pred": wandb.Histogram(y_pred.cpu().detach())})
        wandb.log({"train/y_true": wandb.Histogram(y_true.cpu().detach())})

        return {'loss': loss}  # , 'train_accuracy': acc, 'train_f1': f1}
Beispiel #3
0
def log_angle_distributions(args, pred_ang, src_seq):
    """ Logs a histogram of predicted angles to wandb. """
    # Remove batch-level masking
    batch_mask = src_seq.ne(VOCAB.pad_id)
    pred_ang = pred_ang[batch_mask]
    inv_ang = inverse_trig_transform(pred_ang.view(1, pred_ang.shape[0],
                                                   -1)).cpu().detach().numpy()
    pred_ang = pred_ang.cpu().detach().numpy()

    wandb.log(
        {
            "Predicted Angles (sin cos)":
            wandb.Histogram(np_histogram=np.histogram(pred_ang)),
            "Predicted Angles (radians)":
            wandb.Histogram(np_histogram=np.histogram(inv_ang))
        },
        commit=False)

    for sincos_idx in range(pred_ang.shape[-1]):
        wandb.log(
            {
                f"Predicted Angles (sin cos) - {sincos_idx:02}":
                wandb.Histogram(
                    np_histogram=np.histogram(pred_ang[:, sincos_idx]))
            },
            commit=False)

    for rad_idx in range(inv_ang.shape[-1]):
        wandb.log(
            {
                f"Predicted Angles (radians) - {rad_idx:02}":
                wandb.Histogram(np_histogram=np.histogram(inv_ang[0, :,
                                                                  rad_idx]))
            },
            commit=False)
Beispiel #4
0
  def _on_step(self, plot=True) -> bool:
    """Evaluate the current policy for self.eval_episodes, then take a render
    and report all stats to W&B

    Args:
      plot: Enable matplotlib plotting behavior. Should be set to True unless 
        testing. Defaults to True.

    Returns:
      True, as per API requirements
    """
    mean_rewards, std_rewards = evaluate_policy(
      self.model, self.env, n_eval_episodes=self.eval_episodes)
    
    images = []
    rewards = []
    actions = []
    obses = []
    step_cnt = 0
    done, state = False, None
    obs = self.env.reset()
    while not done:
      if step_cnt % self.render_freq == 0:
        images.append(self.env.render(mode='rgb_array'))

      action, state = self.model.predict(obs, state=state, deterministic=True)
      obs, reward, done, _ = self.env.step(action)

      rewards.append(reward)
      actions.append(action)
      obses.append(obs)
      step_cnt += 1

    render = np.array(images)
    render = np.transpose(render, (0, 3, 1, 2))

    actions = np.array(actions).flatten()
    observes = np.array(obses).flatten()

    rewards = np.array(rewards)
    if plot:
      plt.clf()
      plt.plot(np.arange(len(rewards)), rewards)
      plt.xlabel('timesteps')
      plt.ylabel('rewards')
      plt.title('Timestep {}'.format(self.num_timesteps))

    wandb.log({
      'test_reward_mean': mean_rewards, 
      'test_reward_std': std_rewards,
      'render': wandb.Video(render, format='gif', fps=self.fps),
      'global_step': self.num_timesteps,
      'evaluations': self.n_calls,
      'reward_distribution': wandb.Histogram(rewards),
      'action_distribution': wandb.Histogram(actions),
      'observation_distribution': wandb.Histogram(observes),
      'reward_vs_time': plot and wandb.Image(plt),
    }, step=self.num_timesteps)

    return True
Beispiel #5
0
 def on_episode_end(self) -> None:
     self.num_episodes += 1
     action_hist = wandb.Histogram(
         [step['action'] for step in self.history])
     random_hist = wandb.Histogram(
         [int(step['was_random']) for step in self.history])
     randomness = sum([int(step['was_random']) for step in self.history])
     frames = len(self.history)
     episode_reward = sum([step['reward'] for step in self.history])
     log = {
         'buffer_size': len(self.runner.ReplayBuffer),
         'reward': episode_reward,
         'randomness': randomness / frames,
         'frames': frames,
         'actions': action_hist,
         'was_random': random_hist,
         'episode': self.num_episodes
     }
     self.highest_reward = max(self.highest_reward, episode_reward)
     if self.num_episodes % self.plot_every == 0:
         Q_values = np.concatenate([step['Q'] for step in self.history])
         log.update({
             'Q_activations':
             get_Q_value_fig(
                 Q_values, self.runner.env.unwrapped.get_action_meanings())
         })
     self.run.log(log, step=self.num_steps)
Beispiel #6
0
 def plot_with_wandb(self, epoch, names, weights, sigmas):
     to_plot_weights = {}
     for name, weight in zip(names, weights):
         weight = weight.numpy()
         to_plot_weights[f"weights_{name}"] = wandb.Histogram(weight)
         if 'kernel' in name:
             try:
                 weight = weight.reshape(
                     (-1,
                      weight.shape[-1]))  # ensure square matrix with rank 2
                 eigenvalues = scipy.linalg.svdvals(
                     weight)  # biggest singular value
                 if 'spectral' in name:
                     prefix = '/'.join(
                         name.split('/')
                         [:-1])  # whole layer name, drop Variable's name
                     sigma_name = prefix + '/sigma:0'
                     sigma = sigmas[
                         sigma_name]  # retrieve the sigma associated
                     ratios = eigenvalues / sigma.numpy()
                     to_plot_weights[
                         f"singular_ratio_{name}"] = wandb.Histogram(ratios)
                 else:
                     to_plot_weights[f"singular_{name}"] = wandb.Histogram(
                         eigenvalues)
             except np.linalg.LinAlgError:
                 print('WARNING: np.eigvals did not converge')
     wandb.log(to_plot_weights,
               commit=False)  # wandb callback ensures the commit later
Beispiel #7
0
 def model_weights(self, model):
     layer_num = 1
     for name, param in model.named_parameters():
         if param.numel() == 1:
             self.dict_to_scalar(
                 "weights",
                 {"layer{}-{}/value".format(layer_num, name): param.max()})
         else:
             self.dict_to_scalar(
                 "weights",
                 {"layer{}-{}/max".format(layer_num, name): param.max()})
             self.dict_to_scalar(
                 "weights",
                 {"layer{}-{}/min".format(layer_num, name): param.min()})
             self.dict_to_scalar(
                 "weights",
                 {"layer{}-{}/mean".format(layer_num, name): param.mean()})
             self.dict_to_scalar(
                 "weights",
                 {"layer{}-{}/std".format(layer_num, name): param.std()})
             self.log_dict["weights/layer{}-{}/param".format(
                 layer_num, name)] = wandb.Histogram(param)
             self.log_dict["weights/layer{}-{}/grad".format(
                 layer_num, name)] = wandb.Histogram(param.grad)
         layer_num += 1
Beispiel #8
0
def wlog_weight(model: nn.Module) -> None:
    """Log weights on wandb."""
    wlog = dict()
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue
        layer_name, weight_type = name.rsplit(".", 1)

        # get params(weight, bias, weight_orig)
        if weight_type in ("weight", "bias", "weight_orig"):
            w_name = "params/" + layer_name + "." + weight_type
            weight = eval("model." + dot2bracket(layer_name) + "." + weight_type)
            weight = weight.cpu().data.numpy()
            wlog.update({w_name: wandb.Histogram(weight)})
        else:
            continue

        # get masked weights
        if weight_type == "weight_orig":
            w_name = "params/" + layer_name + ".weight"
            named_buffers = eval(
                "model." + dot2bracket(layer_name) + ".named_buffers()"
            )
            mask: Tuple[str, torch.Tensor] = next(
                x for x in list(named_buffers) if x[0] == "weight_mask"
            )[1].cpu().data.numpy()
            masked_weight = weight[np.where(mask == 1.0)]
            wlog.update({w_name: wandb.Histogram(masked_weight)})
    wandb.log(wlog, commit=False)
Beispiel #9
0
    def _calc_gradients(self, unlabel):
        with torch.no_grad():

            if (self.use_gpu):
                unlabel = unlabel.cuda()

            embedding_unlabel = self.model_d(unlabel, feature=True)
            fake = self.model_g(unlabel.size()[0])
            fake = fake.view(unlabel.size()).detach()
            embedding_fake = self.model_d(fake, feature=True)

            self.discriminator_real = embedding_unlabel[0].detach().cpu(
            ).numpy()
            self.discriminator_fake = embedding_fake[0].detach().cpu().numpy()

            self.generator_bias = self.model_g.fc3.bias.detach().cpu().numpy()
            self.discriminator_weight = self.model_d.layers[-1].weight.detach(
            ).cpu().numpy()

            if (self.opt.SYSTEM.DEBUG == False):
                wandb.run.summary.update(
                    {"real_feature": wandb.Histogram(self.discriminator_real)})
                wandb.run.summary.update(
                    {"fake_feature": wandb.Histogram(self.discriminator_fake)})
                wandb.run.summary.update(
                    {"fc3_bias": wandb.Histogram(self.generator_bias)})
                wandb.run.summary.update({
                    "D_feature_weight":
                    wandb.Histogram(self.discriminator_weight)
                })
            return
    def visualize_histograms(self, batch, y_hat_depth):
        batch = {
            k: v.cpu().detach()
            for k, v in batch.items() if torch.is_tensor(v)
        }
        y_hat_depth = y_hat_depth.cpu().detach()

        # visualize depth histograms to see how far the distribution of values is away from gaussian
        depth_normalized = batch[MOD_DEPTH]
        depth_normalized = depth_normalized[depth_normalized ==
                                            depth_normalized]
        depth_meters = depth_normalized * self.depth_meters_stddev + self.depth_meters_mean
        y_hat_depth_meters = y_hat_depth * self.depth_meters_stddev + self.depth_meters_mean

        # Only lowercase letters work for the log names
        # Use commit=False to not increment the step counter
        self.logger.experiment[0].log(
            {
                'histograms/gt_depth_normalized':
                wandb.Histogram(depth_normalized, num_bins=64),
                'histograms/gt_depth_meters':
                wandb.Histogram(depth_meters, num_bins=64),
                'histograms/pred_depth_normalized':
                wandb.Histogram(y_hat_depth, num_bins=64),
                'histograms/pred_depth_meters':
                wandb.Histogram(y_hat_depth_meters, num_bins=64),
            },
            commit=False)
Beispiel #11
0
def evaluate(dataset, LOG, metric_computer, dataloaders, model, opt, evaltypes, device,
             aux_store=None, make_recall_plot=False, store_checkpoints=True, log_key='Test'):
    """
    Parent-Function to compute evaluation metrics, print summary string and store checkpoint files/plot sample recall plots.
    """
    computed_metrics, extra_infos = metric_computer.compute_standard(opt, model, dataloaders[0], evaltypes, device)

    numeric_metrics = {}
    histogr_metrics = {}
    for main_key in computed_metrics.keys():
        for name,value in computed_metrics[main_key].items():
            if isinstance(value, np.ndarray):
                if main_key not in histogr_metrics: histogr_metrics[main_key] = {}
                histogr_metrics[main_key][name] = value
            else:
                if main_key not in numeric_metrics: numeric_metrics[main_key] = {}
                numeric_metrics[main_key][name] = value

    ###
    full_result_str = ''
    for evaltype in numeric_metrics.keys():
        full_result_str += 'Embed-Type: {}:\n'.format(evaltype)
        for i,(metricname, metricval) in enumerate(numeric_metrics[evaltype].items()):
            full_result_str += '{0}{1}: {2:4.4f}'.format(' | ' if i>0 else '',metricname, metricval)
        full_result_str += '\n'

    print(full_result_str)


    ###
    for evaltype in evaltypes:
        for storage_metric in opt.storage_metrics:
            parent_metric = evaltype+'_{}'.format(storage_metric.split('@')[0])
            if parent_metric not in LOG.progress_saver[log_key].groups.keys() or \
               numeric_metrics[evaltype][storage_metric]>np.max(LOG.progress_saver[log_key].groups[parent_metric][storage_metric]['content']):
               print('Saved weights for best {}: {}\n'.format(log_key, parent_metric))
               set_checkpoint(model, opt, LOG.progress_saver, LOG.prop.save_path+'/checkpoint_{}_{}_{}.pth.tar'.format(log_key, evaltype, storage_metric), aux=aux_store)


    ###
    if opt.log_online:
        for evaltype in histogr_metrics.keys():
            for eval_metric, hist in histogr_metrics[evaltype].items():
                import wandb, numpy
                wandb.log({log_key+': '+evaltype+'_{}'.format(eval_metric): wandb.Histogram(np_histogram=(list(hist),list(np.arange(len(hist)+1))))}, step=opt.epoch)
                wandb.log({log_key+': '+evaltype+'_LOG-{}'.format(eval_metric): wandb.Histogram(np_histogram=(list(np.log(hist)+20),list(np.arange(len(hist)+1))))}, step=opt.epoch)

    ###
    for evaltype in numeric_metrics.keys():
        for eval_metric in numeric_metrics[evaltype].keys():
            parent_metric = evaltype+'_{}'.format(eval_metric.split('@')[0])
            LOG.progress_saver[log_key].log(eval_metric, numeric_metrics[evaltype][eval_metric],  group=parent_metric)

        ###
        if make_recall_plot:
            recover_closest_standard(extra_infos[evaltype]['features'],
                                     extra_infos[evaltype]['image_paths'],
                                     LOG.prop.save_path+'/sample_recoveries.png')
 def add_histogram(tag, values, global_step):
     if len(values) == 2:
         wandb.log({tag: wandb.Histogram(np_histogram=values)},
                   step=global_step,
                   commit=False)
     else:
         wandb.log({tag: wandb.Histogram(values)},
                   step=global_step,
                   commit=False)
    def plot_sampling_frequency(self, simulator_agent, agent_policy, timesteps,
                                plot_path, log_path):
        logger.debug('Plotting Sampling Frequency...')
        for dimension in range(simulator_agent.nparams):
            plt.figure(figsize=(16, 9))
            dimension_name = self.randomized_env.get_dimension_name(dimension)
            sampled_regions = np.array(
                simulator_agent.sampled_regions[dimension]).flatten()

            np.savez('{}.npz'.format(
                os.path.join(
                    log_path,
                    'sampled_regions-{}-{}'.format(dimension, timesteps))),
                     sampled_regions=sampled_regions)

            scaled_data = self.randomized_env.rescale(dimension,
                                                      sampled_regions)
            if len(scaled_data > 0):
                wandb_histograms = {}
                wandb_histograms[
                    f"Sampling Frequency for {dimension_name} - Cumulative"] = wandb.Histogram(
                        np_histogram=np.histogram(scaled_data,
                                                  bins=self.npoints))
                wandb_histograms[
                    f"Sampling Frequency for {dimension_name} - Cumulative Normalized"] = wandb.Histogram(
                        np_histogram=np.histogram(
                            scaled_data, bins=self.npoints, density=True))
                if hasattr(self, "prev_scaled_data"):
                    hist_curr, bin1 = np.histogram(scaled_data,
                                                   bins=self.npoints)
                    hist_past, bin2 = np.histogram(self.prev_scaled_data,
                                                   bins=self.npoints)
                    if not np.array_equal(bin1, bin2):
                        logger.warning("PROBLEM: Bins don't match!")
                        logger.warning(f"Bin 1: {bin1}")
                        logger.warning(f"Bin 2: {bin2}")
                    wandb_histograms[
                        f"Sampling Frequency for {dimension_name}"] = wandb.Histogram(
                            np_histogram=(hist_curr - hist_past, bin1))
                self.prev_scaled_data = scaled_data
                wandb_histograms["Total # Environments Sampled"] = len(
                    scaled_data)
                wandb.log(wandb_histograms)
            plt.hist(scaled_data, bins=self.npoints)

            if self.config.get('hist_xlims') is not None:
                xlims = self.config.get('hist_xlims')
                plt.xlim(xlims[0], xlims[1])

            plt.ylim(0, self.config['hist_ylim_high'])
            plt.ylabel('Number of environment instances seen')
            plt.xlabel('Sampling frequency for {}'.format(dimension_name))
            plt.savefig('{}.png'.format(
                os.path.join(plot_path, '{}-{}'.format(dimension, timesteps))))
            plt.close()
    def log_pre_update(self):
        """
        Initialize the info dictionary to be logged in wandb and collect base metrics
        Returns info dictionary.
        """

        # Initialize and update the info dict for logging
        info = dict()
        info["ppo/advantage_mean"] = self.buf_advantages.mean()
        info["ppo/advantage_std"] = self.buf_advantages.std()
        info["ppo/return_mean"] = self.buf_returns.mean()
        info["ppo/return_std"] = self.buf_returns.std()
        info["ppo/value_est_mean"] = self.rollout.buf_vpreds.mean()
        info["ppo/value_est_std"] = self.rollout.buf_vpreds.std()
        info["ppo/explained_variance"] = explained_variance(
            self.rollout.buf_vpreds.flatten(),  # TODO: switch to ravel if pytorch>=1.9
            self.buf_returns.flatten()  # TODO: switch to ravel if pytorch >= 1.9
        )
        info["ppo/reward_mean"] = torch.mean(self.rollout.buf_rewards)

        if self.rollout.best_ext_return is not None:
            info["performance/best_ext_return"] = self.rollout.best_ext_return
        # TODO: maybe add extra flag for detailed logging so runs are not slowed down
        if not self.debugging:
            feature_stats, stacked_act_feat = self.get_activation_stats(
                self.rollout.buf_acts_features, "activations_features/"
            )
            hidden_stats, stacked_act_pi = self.get_activation_stats(
                self.rollout.buf_acts_pi, "activations_hidden/"
            )
            info.update(feature_stats)
            info.update(hidden_stats)

            info["activations_features/raw_act_distribution"] = wandb.Histogram(
                to_numpy(stacked_act_feat)
            )
            info["activations_hidden/raw_act_distribution"] = wandb.Histogram(
                to_numpy(stacked_act_pi)
            )

            info["ppo/action_distribution"] = wandb.Histogram(
                to_numpy(self.rollout.buf_acs).flatten()
            )

            if self.vlog_freq >= 0 and self.n_updates % self.vlog_freq == 0:
                print(str(self.n_updates) + " updates - logging video.")
                # Reshape images such that they have shape [time,channels,width,height]
                sample_video = torch.moveaxis(self.rollout.buf_obs[0], 3, 1)
                # Log buffer video from first env
                info["observations"] = wandb.Video(
                    to_numpy(sample_video), fps=12, format="gif"
                )

        return info
Beispiel #15
0
 def train(self, replay_buffer, batch_size, t, log=False, sub_actor=None):
     state, action, next_state, reward, done, state_seq, action_seq = replay_buffer.sample(batch_size)
     if self.offpolicy and self.name == 'meta': 
         action = off_policy_correction(self.subgoal_ranges, self.target_dim, sub_actor, action, state, next_state, self.no_candidates,
                                       self.c_step, state_seq, action_seq)
     self._train_step(state, action, next_state, reward, done)
     self.total_it.assign_add(1)
     if log:
         wandb.log({f'{self.name}/mean_weights_actor': wandb.Histogram([tf.reduce_mean(x).numpy() for x in self.actor.weights])}, commit=False)
         wandb.log({f'{self.name}/mean_weights_critic': wandb.Histogram([tf.reduce_mean(x).numpy() for x in self.critic.weights])}, commit=False)
     return self.actor_loss.numpy(), self.critic_loss.numpy(), self.ac_gr_norm.numpy(), self.cr_gr_norm.numpy(), self.ac_gr_std.numpy(), self.cr_gr_std.numpy()
def wandb_minigrid(capt, pred, gate, diags):
    # import pdb; pdb.set_trace()
    logz = dict()
    for k in diags:
        data = torch.from_numpy(np.array(diags[k]))
        # import pdb; pdb.set_trace()
        logz["capt_%s" % k] = wandb.Histogram(data[1 - gate])
        logz["pred_%s" % k] = wandb.Histogram(data[gate])
        logz[k] = data.mean()

    wandb.log(logz)
Beispiel #17
0
 def _log_weights(self):
     metrics = {}
     for layer in self.model.layers:
         weights = layer.get_weights()
         if len(weights) == 1:
             metrics["parameters/" + layer.name +
                     ".weights"] = wandb.Histogram(weights[0])
         elif len(weights) == 2:
             metrics["parameters/" + layer.name +
                     ".weights"] = wandb.Histogram(weights[0])
             metrics["parameters/" + layer.name +
                     ".bias"] = wandb.Histogram(weights[1])
     return metrics
Beispiel #18
0
def parallel_hill_climber(matrix, pop_size=3, total_steps=100, master_seed=0):
    parent_indices = {}
    print("init...")
    rng = np.random.default_rng(seed=master_seed)
    for i in range(pop_size):
        parent_indices[i] = np.arange(matrix.shape[0])
        # start with random indices
        rng.shuffle(parent_indices[i])

    print("start")
    rng = np.random.default_rng(seed=master_seed)
    for step in range(total_steps):
        possible_swaps = {}
        children_indices = {}
        num_detected = {}
        num_swapped = {}
        LAs = {}
        for i in range(pop_size):
            parent = load_matrix_from_indices(matrix, parent_indices[i]) # lazy instantiate to save memory

            gpu_random_seed = rng.integers(low=0, high=10000000)
            possible_swaps[i] = _detect_possible_swaps(parent, gpu_random_seed=gpu_random_seed)
            num_detected[i] = possible_swaps[i].shape[0]

            swap_random_seed = rng.integers(low=0, high=10000000)
            child, children_indices[i], num_swapped[i] = _apply_swaps(matrix=parent, indices=parent_indices[i],
                                                            detected_pairs=possible_swaps[i], seed=swap_random_seed)

            LAs[i] = loss_gpu(child)

        parent_indices = children_indices

        _f = list(LAs.values())
        _s = list(num_swapped.values())
        _d = list(num_detected.values())
        record = {
            "step": step,
            "LA/all": wandb.Histogram(_f),
            "LA/min": np.min(_f),
            "LA/mean": np.mean(_f),
            "LA/std": np.std(_f),
            "num_swapped/all": wandb.Histogram(_s),
            "num_swapped/mean": np.mean(_s),
            "num_detected/all": wandb.Histogram(_d),
            "num_detected/mean": np.mean(_d),
        }
        wandb.log(record)
        bestsofar = load_matrix_from_indices(matrix, parent_indices[np.argmin(_f)])
        save_pic(bestsofar, parent_indices[np.argmin(_f)], f"10.0/best_step_{step:04}")
        
        print(f"step {step}: min LAs {np.min(_f)}")
 def log_sampling_distr(self):
     import wandb
     import numpy as np
     wandb.log({
         'Dist. Distr.':
         wandb.Histogram(np_histogram=(np.array(self.distr),
                                       np.array(self.support)))
     })
     wandb.log({
         'Log Dist. Distr.':
         wandb.Histogram(np_histogram=(
             np.log(np.clip(np.array(self.distr), 1e-20, None)) -
             np.log(1e-20), np.array(self.support)))
     })
Beispiel #20
0
 def train(self,
           replay_buffer,
           batch_size,
           t,
           log=False,
           sub_actor=None,
           sub_agent=None,
           FM=None):
     state, action, reward, next_state, done, state_seq, action_seq = replay_buffer.sample(
         1000)
     self._maybe_save_attention()
     self._maybe_get_attention_gradients(state, action, reward, next_state)
     state, action, reward, next_state, done, state_seq, action_seq = replay_buffer.sample(
         batch_size)
     action = self._maybe_offpol_correction(sub_actor, action, state,
                                            next_state, state_seq,
                                            action_seq)
     td_error = self._train_critic(state, action, reward, next_state, done,
                                   log, replay_buffer.is_weight)
     if self._per:
         self._prioritized_experience_update(self._per, td_error,
                                             next_state, action, reward,
                                             replay_buffer)
     #state, action, reward, next_state, done, state_seq, action_seq = replay_buffer.sample_low(batch_size)
     self._train_actor(state, action, reward, next_state, done, log,
                       replay_buffer.is_weight, sub_agent)
     #td_error = self._compute_td_error(state, action, reward, next_state, done)
     #self._prioritized_experience_update(self._per, td_error, next_state, action, reward, replay_buffer)
     self.total_it.assign_add(1)
     if log:
         wandb.log(
             {
                 f'{self._name}/mean_weights_actor':
                 wandb.Histogram([
                     tf.reduce_mean(x).numpy() for x in self.actor.weights
                 ])
             },
             commit=False)
         wandb.log(
             {
                 f'{self._name}/mean_weights_critic':
                 wandb.Histogram([
                     tf.reduce_mean(x).numpy() for x in self.critic.weights
                 ])
             },
             commit=False)
     return self.actor_loss.numpy(), self.critic_loss.numpy(
     ), self.ac_gr_norm.numpy(), self.cr_gr_norm.numpy(
     ), self.ac_gr_std.numpy(), self.cr_gr_std.numpy()
Beispiel #21
0
def get_pair_extra_info(
        targets_max_prob: Tensor, i_indices: Tensor, j_indices: Tensor,
        similarities: Tensor,
        final_mask: Tensor) -> Tuple[LogDictType, PlotDictType]:
    def mean_std_max_min(
            t: Union[Tensor, np.ndarray],
            prefix: str = "") -> Dict[str, Union[Tensor, np.ndarray]]:
        return {
            f"{prefix}/mean":
            t.mean() if t.numel() > 0 else to_tensor(0, tensor_like=t),
            f"{prefix}/std":
            t.std() if t.numel() > 0 else to_tensor(0, tensor_like=t),
            f"{prefix}/max":
            t.max() if t.numel() > 0 else to_tensor(0, tensor_like=t),
            f"{prefix}/min":
            t.min() if t.numel() > 0 else to_tensor(0, tensor_like=t),
        }

    targets_i_max_prob = targets_max_prob[i_indices]
    targets_j_max_prob = targets_max_prob[j_indices]

    selected_sim = similarities[final_mask]
    selected_i_conf = targets_i_max_prob[final_mask]
    selected_j_conf = targets_j_max_prob[final_mask]

    selected_i_conf_stat = mean_std_max_min(selected_i_conf,
                                            prefix="selected_i_conf")
    selected_j_conf_stat = mean_std_max_min(selected_j_conf,
                                            prefix="selected_j_conf")
    selected_sim_stat = mean_std_max_min(selected_sim, prefix="selected_sim")

    selected_i_conf_hist = wandb.Histogram(_detorch(selected_i_conf))
    selected_j_conf_hist = wandb.Histogram(_detorch(selected_j_conf))
    selected_sim_hist = wandb.Histogram(_detorch(selected_sim))

    log_info = {
        **selected_i_conf_stat,
        **selected_j_conf_stat,
        **selected_sim_stat,
    }
    plot_info = {
        "selected_i_conf_hist": selected_i_conf_hist,
        "selected_j_conf_hist": selected_j_conf_hist,
        "selected_sim_hist": selected_sim_hist,
    }

    return {f"pair_loss/{k}": v for k, v in log_info.items()}, \
           {f"pair_loss/{k}": v for k, v in plot_info.items()}
Beispiel #22
0
    def _log_gradients(self):
        if (not self.training_data):
            raise ValueError(
                "Need to pass in training data if logging gradients")

        X_train = self.training_data[0]
        y_train = self.training_data[1]
        metrics = {}
        weights = self.model.trainable_weights  # weight tensors
        # filter down weights tensors to only ones which are trainable
        weights = [
            weight for weight in weights
            if self.model.get_layer(weight.name.split('/')[0]).trainable
        ]

        gradients = self.model.optimizer.get_gradients(
            self.model.total_loss, weights)  # gradient tensors
        input_tensors = [
            self.model.inputs[0],  # input data
            # how much to weight each sample by
            self.model.sample_weights[0],
            self.model.targets[0],  # labels
            K.learning_phase(),  # train or test mode
        ]

        get_gradients = K.function(inputs=input_tensors, outputs=gradients)

        grads = get_gradients([X_train, np.ones(len(y_train)), y_train])

        for (weight, grad) in zip(weights, grads):
            metrics["gradients/" + weight.name.split(':')[0] +
                    ".gradient"] = wandb.Histogram(grad)

        return metrics
Beispiel #23
0
 def plot_with_wandb(self, epoch, names):
     grads = {}
     for i, grad_norm in enumerate(self.bag.grad_norms_hist):
         grad_norm = tf.stack(grad_norm).numpy()
         grads[f"gradients_{names[i]}"] = wandb.Histogram(grad_norm)
     wandb.log(grads,
               commit=False)  # wandb callback ensures the commit later
Beispiel #24
0
    def on_validation_epoch_end(self):
        targets = torch.cat(self.targets, dim=0).cpu().view(-1)
        pred_prob = torch.cat(self.pred_probs, dim=0).cpu()
        preds = torch.argmax(pred_prob, dim=1)

        # make sure our masking is successful
        for i in range(targets.shape[0]):
            assert targets[i] == 0 or targets[i] == 1, f"target is {targets[i]}"
        # we don't predict -1, and don't train on them

        self._reset_aggregates()

        # we can predict ROC?
        # Target scores, can either be probability estimates of the positive
        #  class
        roc_pred_prob = pred_prob[:, :2]  # label index 3 is -1 (no prediction)
        # normalize
        roc_pred_prob = roc_pred_prob / torch.sum(roc_pred_prob, dim=1, keepdim=True)
        roc_pred_prob = roc_pred_prob[:, 1]

        log = {
            # 'val_correctness_acc': skm.accuracy_score(filtered_targets, filtered_preds),
            'val_acc_epoch': skm.accuracy_score(targets, preds),
            'val_roc_epoch': skm.roc_auc_score(targets, roc_pred_prob),
            'epoch': self.current_epoch,
        }
        print(log)

        if self.first_epoch:
            log['true_distr'] = wandb.Histogram(targets)
            self.first_epoch = False

        self.logger.experiment.log(log)
Beispiel #25
0
    def log_histogram(self, tensor: Tensor, name: str) -> None:
        """
        Override this method to customize the logging of histograms.
        Detaches the tensor from the graph and moves it to the CPU for logging.

        Args:
            tensor: The tensor for which to log a histogram
            name: The name of the tensor as determined by the callback. Example: ``ìnput/0/[64, 1, 28, 28]``
        """
        logger = self._trainer.logger
        tensor = tensor.detach().cpu()
        if isinstance(logger, TensorBoardLogger):
            logger.experiment.add_histogram(
                tag=name, values=tensor, global_step=self._trainer.global_step
            )

        if isinstance(logger, WandbLogger):
            if not _WANDB_AVAILABLE:  # pragma: no cover
                raise ModuleNotFoundError(
                    "You want to use `wandb` which is not installed yet."
                )

            logger.experiment.log(
                data={name: wandb.Histogram(tensor)}, commit=False,
            )
    def on_result(self, result):
        step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]

        # Log scalars
        logged_results = ['episode_reward_max', 'episode_reward_mean', 'episode_reward_min', 'episode_len_mean',
                          'custom_metrics', 'sampler_perf', 'info', 'perf']
        result_copy = result.copy()
        for key, val in result.items():
            if key not in logged_results:
                del result_copy[key]
        flat_result = flatten_dict(result_copy, delimiter="/")
        self.wandb_run.log(flat_result, step=step, sync=False)

        # Log histograms
        for key, val in result['hist_stats'].items():
            try:
                if key != '_robot_coordinates':
                    self.wandb_run.log({"Histograms/"+key: wandb.Histogram(val)}, step=step, sync=False)
            except ValueError:
                logger.warning("Unable to log histogram for {}".format(key))

        # Log trajectories
        traj_fig = plot_trajectories(result['hist_stats']['_robot_coordinates'])
        traj_fig.savefig("Trajectory.png")
        self.wandb_run.log({'Episode Trajectories': wandb.Image(traj_fig)}, step=step, sync=False)
        plt.close(traj_fig)
Beispiel #27
0
    def _log_train(self, step, train_info, ep_info, prefix="", env_step=None):
        if env_step is None:
            env_step = step
        if (step // self._config.num_workers) % self._config.log_interval == 0:
            for k, v in train_info.items():
                if np.isscalar(v) or (hasattr(v, "shape")
                                      and np.prod(v.shape) == 1):
                    wandb.log({"train_rl/%s" % k: v}, step=step)
                elif isinstance(v, np.ndarray) or isinstance(v, list):
                    wandb.log({"train_rl/%s" % k: wandb.Histogram(v)},
                              step=step)
                else:
                    wandb.log({"train_rl/%s" % k: [wandb.Image(v)]}, step=step)

        for k, v in ep_info.items():
            wandb.log(
                {
                    prefix + "train_ep/%s" % k: np.mean(v),
                    "global_step": env_step
                },
                step=step,
            )
            wandb.log(
                {
                    prefix + "train_ep_max/%s" % k: np.max(v),
                    "global_step": env_step
                },
                step=step,
            )
        if self._config.vis_replay:
            if step % self._config.vis_replay_interval == 0:
                self._vis_replay_buffer(step)
Beispiel #28
0
    def histo_summary(self, tag, values, step, bins=1000):
        """Log a histogram of the tensor of values."""

        wandb.log({
            tag:
            wandb.Histogram(np_histogram=np.histogram(values, bins=bins))
        })
Beispiel #29
0
    def train(self,
              replay_buffer,
              batch_size,
              t,
              log=False,
              sub_actor=None,
              sub_agent=None):
        state, action, reward, next_state, done, state_seq, action_seq = replay_buffer.sample(
            batch_size)
        td_error = self._train_critic(state, action, reward_new, next_state,
                                      done, log, replay_buffer.is_weight)
        if self._per:
            self._prioritized_experience_update(self._per, td_error,
                                                next_state, action, reward,
                                                replay_buffer)
        #state, action, reward, next_state, done, state_seq, action_seq = replay_buffer.sample_low(batch_size)
        self.total_it.assign_add(1)
        if log:
            wandb.log(
                {
                    f'{self.name}/mean_weights_critic':
                    wandb.Histogram([
                        tf.reduce_mean(x).numpy() for x in self.critic.weights
                    ])
                },
                commit=False)

        return self.critic_loss.numpy(), self.cr_gr_norm.numpy(
        ), self.cr_gr_std.numpy()
Beispiel #30
0
def plot_grad_flow(named_parameters):
    '''Plots the gradients flowing through different layers in the net during training.
    Can be used for checking for possible gradient vanishing / exploding problems.
    
    Usage: Plug this function in Trainer class after loss.backwards() as 
    "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow'''
    plt.figure(figsize=(10,10))

    ave_grads = []
    max_grads= []
    layers = []
    for n, p in named_parameters:
        if(p.requires_grad) and ("bias" not in n):
            layers.append(n)
            ave_grads.append(p.grad.abs().mean())
            max_grads.append(p.grad.abs().max())
            wandb.log({n: wandb.Histogram(p.grad)})
    plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
    plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b")
    plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" )
    plt.xticks(range(0,len(ave_grads), 1), layers, rotation=45)
    plt.xlim(left=0, right=len(ave_grads))
    # plt.ylim(bottom = -0.001, top=0.02) # zoom in on the lower gradient regions
    # plt.ylim(bottom = -0.001, top=1.1)
    plt.xlabel("Layers")
    plt.ylabel("average gradient")
    plt.title("Gradient flow")
    plt.grid(True)
    plt.legend([Line2D([0], [0], color="c", lw=4),
                Line2D([0], [0], color="b", lw=4),
                Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient'])
    
    plt.tight_layout()
    wandb.log({"gradients": wandb.Image(plt)})