Exemple #1
0
class SummaryHook(Hook):
    """ SummaryHook, write tensorboard summery in training
    """
    def __init__(self, log_root, freq, rank):
        """ Create A SummaryHook object

            Args:
            log_root: tensorboard summary root path
            freq: step interval
            rank: gpu rank
        """

        self.writer = SummaryWriter(log_root) if rank == 0 else None
        self.freq = freq

    def after_train_iter(self, task, step, epoch):
        if self.writer is None:
            return
        if step == 0 or (step + 1) % self.freq == 0:
            global_step = step + epoch * task.step_per_epoch
            scalars = task.summary.get('scalars', {})
            for k, v in scalars.items():
                if isinstance(v, list):
                    for i, x in enumerate(v):
                        self.writer.add_scalar('%s_%d' % (k, i),
                                               x.val,
                                               global_step=global_step)
                elif isinstance(v, AverageMeter):
                    self.writer.add_scalar(k, v.val, global_step=global_step)

            histograms = task.summary.get('histograms', {})
            for k, v in histograms.items():
                if isinstance(v, list):
                    for i, x in enumerate(v):
                        self.writer.add_histogram('%s_%d' % (k, i),
                                                  x.val,
                                                  global_step=global_step)
                elif isinstance(v, AverageMeter):
                    self.writer.add_histogram(k,
                                              v.val,
                                              global_step=global_step)

    def after_run(self, *args):
        if self.writer:
            self.writer.close()
Exemple #2
0
def update_summaries(step: int,
                     writer: SummaryWriter,
                     metrics: dict[str, torch.Tensor],
                     pre: str = None):
    for key, val in metrics.items():
        if pre is not None:
            key = '/'.join([pre, key])

        if not isinstance(val, torch.Tensor):
            val = torch.tensor(val, dtype=DTYPE)

        writer.add_scalar(key, val.mean(), global_step=step)
        if len(val.shape) > 1:
            if torch.any(val.isnan()):
                v = drop_nans_from_tensor(val)
                if v.shape[0] == 0:
                    continue

            writer.add_histogram(key, val, global_step=step)
Exemple #3
0
def write_summaries(
        metrics: dict,
        writer: SummaryWriter,
        step: int,
        pre: str = 'ftHMC'
):
    """Write summaries of items in `metrics` using `writer`."""
    for key, val in metrics.items():
        if key == 'traj':
            continue
        #  if key == 'dt':
        # if key in ['dt', 'acc']:
        if isinstance(val, float):
            writer.add_scalar(f'{pre}/{key}', val, global_step=step)
        elif isinstance(val, torch.Tensor):
            val = val.detach().type(torch.get_default_dtype())
            if len(val.shape) > 1:
                writer.add_histogram(f'{pre}/{key}', val, global_step=step)
        else:
            val = torch.tensor(val, dtype=DTYPE)
            writer.add_scalar(f'{pre}/{key}', val.mean(), global_step=step)
Exemple #4
0
def log_values(
    cost,
    grad_norms,
    bl_val,
    epoch,
    batch_id,
    step,
    log_likelihood,
    reinforce_loss,
    bl_loss,
    log_p,
    logger: SummaryWriter,
    args,
):
    avg_cost = cost.mean().item()
    bl_cost = bl_val.mean().item()
    grad_norms, grad_norms_clipped = grad_norms

    # Log values to screen
    print("epoch: {}, train_batch_id: {}, avg_cost: {}, baseline predict: {}".
          format(epoch, batch_id, avg_cost, bl_cost))

    print("grad_norm: {}, clipped: {}".format(grad_norms, grad_norms_clipped))

    # Log values to tensorboard
    logger.add_scalar("avg_cost", avg_cost, step)

    logger.add_scalar("grad_norm", grad_norms[0], step)
    logger.add_scalar("grad_norm_clipped", grad_norms_clipped[0], step)

    logger.add_scalar("actor_loss", reinforce_loss.item(), step)
    logger.add_scalar("nll", -log_likelihood.mean().item(), step)
    # if args.baseline == "critic":
    #     logger.add_scalar("critic_loss", bl_loss.item(), step)
    if not batch_id % 100:
        num_graph, num_step, num_node = log_p.shape
        logger.add_histogram("first_step_prob",
                             log_p.cpu()[0][0].exp().squeeze(), step)
        logger.add_histogram("mid_step_prob",
                             log_p.cpu()[0][num_step // 2].exp().squeeze(),
                             step)
        logger.add_histogram("last_step_prob",
                             log_p.cpu()[0][-1].exp().squeeze(), step)
Exemple #5
0
class TensorboardSummaryHook:
    """
    Logging object allowing Tensorboard summaries to be automatically exported to the tensorboard. Much of its
    functionality is automated. This means that the hook will export as much information as possible to the
    tensorboard.

    Losses, Metrics, Inputs and Outputs are all interpreted and exported according to their dimensionality. Vectors
    results in mean and standard deviation estimates as well as histograms; Pictures results in image summaries and
    histograms; etc.

    There is also the possibily of comparing inputs and outputs pair. This needs to be specified during object
    instantiation.

    Once the user instantiates this object, the workflow corresponding to the ID passes as argument will be
    tracked and the results of the workflow will be exported to the tensorboard.

    .. code-block:: python

            from eisen.utils.logging import TensorboardSummaryHook

            workflow = # Eg. An instance of Training workflow

            logger = TensorboardSummaryHook(workflow.id, 'Training', '/artifacts/dir')
    """

    def __init__(
        self,
        workflow_id,
        phase,
        artifacts_dir,
        comparison_pairs=None,
        show_all_axes=False,
    ):
        """
        This method instantiates an object of type TensorboardSummaryHook. The signature of this method is similar to
        that of every other hook. There is one additional parameter called `comparison_pairs` which is meant to
        hold a list of lists each containing a pair of input/output names that share the same dimensionality and can be
        compared to each other.

        A typical use of `comparison_pairs` is when users want to plot a pr_curve or a confusion matrix by comparing
        some input with some output. Eg. by comparing the labels with the predictions.

        .. code-block:: python

            from eisen.utils.logging import TensorboardSummaryHook

            workflow = # Eg. An instance of Training workflow

            logger = TensorboardSummaryHook(
                workflow_id=workflow.id,
                phase='Training',
                artifacts_dir='/artifacts/dir'
                comparison_pairs=[['labels', 'predictions']]
            )

        :param workflow_id: string containing the workflow id of the workflow being monitored (workflow_instance.id)
        :type workflow_id: UUID
        :param phase: string containing the name of the phase (training, testing, ...) of the workflow monitored
        :type phase: str
        :param artifacts_dir: whether the history of all models that were at a certain point the best should be saved
        :type artifacts_dir: bool
        :param comparison_pairs: list of lists of pairs, which are names of inputs and outputs to be compared directly
        :type comparison_pairs: list of lists of strings
        :param show_all_axes: whether any volumetric data should be shown as axial + sagittal + coronal
        :type show_all_axes: bool

        <json>
        [
            {"name": "comparison_pairs", "type": "list:list:string", "value": ""},
            {"name": "show_all_axes", "type": "bool", "value": "false"}
        ]
        </json>
        """
        self.workflow_id = workflow_id
        self.phase = phase

        self.comparison_pairs = comparison_pairs
        self.show_all_axes = show_all_axes

        if not os.path.exists(artifacts_dir):
            raise ValueError("The directory specified to save artifacts does not exist!")

        dispatcher.connect(self.end_epoch, signal=EISEN_END_EPOCH_EVENT, sender=workflow_id)

        self.artifacts_dir = os.path.join(artifacts_dir, "summaries", phase)

        if not os.path.exists(self.artifacts_dir):
            os.makedirs(self.artifacts_dir)

        self.writer = SummaryWriter(log_dir=self.artifacts_dir)

    def end_epoch(self, message):
        epoch = message["epoch"]

        # if epoch == 0:
        #     self.writer.add_graph(message['model'], ...)

        for typ in ["losses", "metrics"]:
            for dct in message[typ]:
                for key in dct.keys():
                    self.write_vector(typ + "/{}".format(key), dct[key], epoch)

        for typ in ["inputs", "outputs"]:
            for key in message[typ].keys():
                if message[typ][key].ndim == 5:
                    # Volumetric image (N, C, W, H, D)
                    self.write_volumetric_image(typ + "/{}".format(key), message[typ][key], epoch)

                if message[typ][key].ndim == 4:
                    self.write_image(typ + "/{}".format(key), message[typ][key], epoch)

                if message[typ][key].ndim == 3:
                    self.write_embedding(typ + "/{}".format(key), message[typ][key], epoch)

                if message[typ][key].ndim == 2:
                    self.write_class_probabilities(typ + "/{}".format(key), message[typ][key], epoch)

                if message[typ][key].ndim == 1:
                    self.write_vector(typ + "/{}".format(key), message[typ][key], epoch)

                if message[typ][key].ndim == 0:
                    self.write_scalar(typ + "/{}".format(key), message[typ][key], epoch)

        if self.comparison_pairs:
            for inp, out in self.comparison_pairs:
                assert message["inputs"][inp].ndim == message["outputs"][out].ndim

                if message["inputs"][inp].ndim == 1:
                    # in case of binary classification >> PR curve
                    if np.max(message["inputs"][inp]) <= 1 and np.max(message["outputs"][out]) <= 1:
                        self.write_pr_curve(
                            "{}_Vs_{}/pr_curve".format(inp, out),
                            message["inputs"][inp],
                            message["outputs"][out],
                            epoch,
                        )

                    # in any case for classification >> Confusion Matrix
                    self.write_confusion_matrix(
                        "{}_Vs_{}/confusion_matrix".format(inp, out),
                        message["inputs"][inp],
                        message["outputs"][out],
                        epoch,
                    )

    def write_volumetric_image(self, name, value, global_step):
        self.writer.add_scalar(name + "/mean", np.mean(value), global_step=global_step)
        self.writer.add_scalar(name + "/std", np.std(value), global_step=global_step)
        self.writer.add_histogram(name + "/histogram", value.flatten(), global_step=global_step)

        v = np.transpose(value, [0, 2, 1, 3, 4])

        if v.shape[2] != 3 and v.shape[2] != 1:
            v = np.average(v, axis=2, weights=np.arange(0, 1, 1 / v.shape[2]))[:, :, np.newaxis]

        torch_value = torch.tensor(v).float()

        self.writer.add_video(name + "_axis_1", torch_value, fps=10, global_step=global_step)

        if self.show_all_axes:
            v = np.transpose(value, [0, 3, 1, 2, 4])

            if v.shape[2] != 3 and v.shape[2] != 1:
                v = np.average(v, axis=2, weights=np.arange(0, 1, 1 / v.shape[2]))[:, :, np.newaxis]

            torch_value = torch.tensor(v).float()

            self.writer.add_video(name + "_axis_2", torch_value, fps=10, global_step=global_step)

            v = np.transpose(value, [0, 4, 1, 2, 3])

            if v.shape[2] != 3 and v.shape[2] != 1:
                v = np.average(v, axis=2, weights=np.arange(0, 1, 1 / v.shape[2]))[:, :, np.newaxis]

            torch_value = torch.tensor(v).float()

            self.writer.add_video(name + "_axis_3", torch_value, fps=10, global_step=global_step)

    def write_image(self, name, value, global_step):
        self.writer.add_scalar(name + "/mean", np.mean(value), global_step=global_step)
        self.writer.add_scalar(name + "/std", np.std(value), global_step=global_step)
        self.writer.add_histogram(name + "/histogram", value.flatten(), global_step=global_step)
        self.writer.add_images(name, value, global_step=global_step, dataformats="NCHW")

    def write_embedding(self, name, value, global_step):
        pass

    def write_pr_curve(self, name, labels, predictions, global_step):
        self.writer.add_pr_curve(name + "/pr_curve", labels, predictions, global_step)

    def write_confusion_matrix(self, name, labels, predictions, global_step):
        cnf_matrix = confusion_matrix(labels, predictions)
        image = plot_confusion_matrix(cnf_matrix, range(np.max(labels) + 1), normalize=True, title=name)[:, :, 0:3]
        self.writer.add_image(
            name,
            image.astype(float) / 255.0,
            global_step=global_step,
            dataformats="HWC",
        )

    def write_class_probabilities(self, name, value, global_step):
        self.writer.add_image(name, value, global_step=global_step, dataformats="HW")
        self.writer.add_histogram(name + "/distribution", np.argmax(value), global_step=global_step)

    def write_vector(self, name, value, global_step):
        self.writer.add_histogram(name, value, global_step=global_step)
        self.writer.add_scalar(name + "/mean", np.mean(value), global_step=global_step)
        self.writer.add_scalar(name + "/std", np.std(value), global_step=global_step)

    def write_scalar(self, name, value, global_step):
        self.writer.add_scalar(name, value, global_step=global_step)
class GeneratorTensorboardHook(TrainingHook):
    def __init__(self, log_dir, img_log_freq=10, max_batchsize=16):
        super().__init__()
        self.log_dir = ensure_dir(log_dir)
        self.img_log_freq = img_log_freq
        self.max_batchsize = max_batchsize

        self.writer = SummaryWriter(self.log_dir)
        #self.writer.add_custom_scalars(_LAYOUT)

    def _image_transform(self, img):
        nneg_img = torch.abs(img)
        return 255 * nneg_img / nneg_img.sum()

    def call(self, *args, **kwargs):
        self.writer.add_scalar('Loss/generator', kwargs['generator_loss_cpu'],
                               self.step)
        self.notify('Writing loss %s.' %
                    round(kwargs['generator_loss_cpu'].item(), 5))

        self.writer.add_scalar('LossVariance/generator',
                               kwargs['generator_batch_loss_var_cpu'],
                               self.step)
        if kwargs['draw_activation_hist']:
            self._write_activation_histograms(
                buffer_dict=kwargs['buffers_cpu'])
        '''
        for p in kwargs['model'].generator.parameters(): 
            self.notify('Parameter of size %d has mean grad abs %.5f'%(p.numel(),p.grad.abs().mean()))
        '''
        '''
        (as of up-to-date torch and numpy on 9/7/19)
        In the function torch.utils.tensorboard._utils.make_grid, a grid is initialized with np.zeros, with no dtype given.
        It defaults to float, and in the next line from the function where this function is called, in torch.utils.tensorboard.summary.image,
        a scale factor is determined based on the dtype of the returned value from make_grid. 
        So basically if you try to pass uint8 it gets treated as float anyway, because of the
        default behavior of np.zeros. The scale factor then is 255 instead of 1. The easiest way to fix this 
        is just divide by 255 here if the desired behavior (as determined by the dtype of images passed) was to 
        treat them as uint8. Ends up creating a conversion from float32 to uint8 (out of the model) right back to float32, but 
        I'd rather just stuff this in here to maintain my sanity elsewhere and be able to pretend I'm using 
        uint8 when I want to use it. 
        '''

        if (self.step) % self.img_log_freq == 0:
            self.notify('Sending batch images to tensorboard...')
            if kwargs['generated_images_cpu'].dtype == torch.uint8:
                self.writer.add_images(
                    'Generated Images',
                    kwargs['generated_images_cpu'][:self.max_batchsize].to(
                        torch.float32) / 255., self.step)
                self.writer.add_images(
                    'Sampled Images',
                    kwargs['sampled_images_cpu'][:self.max_batchsize].to(
                        torch.float32) / 255., self.step)
            else:
                self.writer.add_images(
                    'Generated Images',
                    kwargs['generated_images_cpu'][:self.max_batchsize],
                    self.step)
                self.writer.add_images(
                    'Sampled Images',
                    kwargs['sampled_images_cpu'][:self.max_batchsize],
                    self.step)
            self.notify('Done. Sent %d images.' %
                        (min(kwargs['generated_images_cpu'].shape[0],
                             self.max_batchsize) +
                         min(kwargs['sampled_images_cpu'].shape[0],
                             self.max_batchsize)))

            self.notify('Sending last layer histogram to tensorboard...')
            self.writer.add_histogram(
                'Generator/Last Layer',
                kwargs['model'].generator.output_layer.convt_layer.weight,
                self.step)
            self.notify('Done.')

            self._write_batchnorm_histograms(kwargs['model'].generator)
            '''
            self.notify('Sending layer 0 filter weights to tensorboard...')
            conv_0_weight = kwargs['model'].generator.conv.conv_layers[0].weight
            write_conv_kernel_to_image_summaries(self.writer,'Generator/First_Kernel',conv_0_weight,self.step,self._image_transform)
            self.notify('Done.')
            
            self.notify('Sending layer -1 filter weights to tensorboard...')
            conv_final_weight = kwargs['model'].generator.conv.conv_layers[-1].weight
            write_conv_kernel_to_image_summaries(self.writer,'Generator/Last_Kernel',conv_final_weight,self.step,self._image_transform)
            self.notify('Done.')
            '''
            '''
            max_kernel_hist = 5
            num_kernels_written = 0
            for i in range(conv_0_weight.shape[0]):
                for j in range(0,i+1):
                    self.writer.add_image('Generator/First Kernel[%i][%j]',conv_0_weight[i,j].unsqueeze(0),self.step)
                    num_kernels_written += 1
                    if num_kernels_written > max_kernel_hist:
                        break
                if num_kernels_written > max_kernel_hist:
                    break
            '''

    def _write_batchnorm_histograms(self, model):
        weights, biases, names = [], [], []
        for i, m in enumerate(model.modules()):
            if 'BatchNorm' in m.__class__.__name__:
                if m.weight is not None:
                    weights.append(m.weight)
                    biases.append(m.bias)
                    names.append('%s_size%d_index%d' %
                                 (m.__class__.__name__, m.weight.numel(), i))

        self.notify('Sending %d batchnorm histograms to tensorboard...' %
                    (len(weights) + len(biases)))
        for w, b, n in zip(weights, biases, names):
            self.writer.add_histogram('BatchNorm/%s_weight' % n, w, self.step)
            self.writer.add_histogram('BatchNorm/%s_bias' % n, b, self.step)
        self.notify('Done.')

    def _write_activation_histograms(self, buffer_dict, md_suffix='_other'):
        self._write_prebinned_histograms(buffer_dict, 'activation_histogram',
                                         md_suffix)

    def _write_prebinned_histograms(self,
                                    buffer_dict,
                                    identifying_substr,
                                    md_suffix='_other'):
        num_histograms_written = 0
        self.notify('Writing activation histograms...')
        for k, v in buffer_dict.items():
            if (identifying_substr in k) and not k.endswith(md_suffix):
                #md_suffix should be [min,max,numel,sum,sum of squares]
                hist_min, hist_max, hist_num, hist_sum, hist_sum_squares = buffer_dict[
                    k + md_suffix]
                step_size = float(hist_max - hist_min) / v.numel()
                bucket_limits = torch.cat(
                    (hist_min + torch.arange(v.numel() - 1) * step_size,
                     torch.Tensor([hist_max])),
                    dim=0)
                self.writer.add_histogram_raw('Activations/%s' % k, hist_min,
                                              hist_max, hist_num, hist_sum,
                                              hist_sum_squares, bucket_limits,
                                              v, self.step)
                num_histograms_written += 1

        self.notify('Done. Wrote %d histograms' % num_histograms_written)
Exemple #7
0
def make_mcmc_ensemble(model,
                       action_fn,
                       batch_size,
                       num_samples,
                       writer: SummaryWriter = None):
    xarr = []  # for holding the configurations
    names = ['x', 'q', 'dqsq', 'logq', 'logp', 'acc']
    history = {name: [] for name in names}

    # Build Markov chain
    sample_gen = serial_sample_generator(model, action_fn, batch_size,
                                         num_samples)
    step = 0
    with torch.no_grad():
        #  for x_new, q_new, logq_new, logp_new in sample_gen:
        for x_new, logq_new, logp_new in sample_gen:
            # Always accept the first proposal
            if len(history['logp']) == 0:
                accepted = True
                q_old = qed.topo_charge(x_new[None, :])
            else:
                q_old = qed.topo_charge(xarr[-1][None, :])
                logp_old = history['logp'][-1]
                logq_old = history['logq'][-1]
                p_accept = torch.exp((logp_new - logq_new) -
                                     (logp_old - logq_old))
                p_accept = min(1, p_accept)
                draw = torch.rand(1)  # ~ [0, 1]
                if draw < p_accept:
                    accepted = True
                else:
                    accepted = False
                    #  x_new = history['x'][-1]
                    x_new = xarr[-1]
                    #  q_new = q_old
                    logp_new = logp_old
                    logq_new = logq_old

            q_new = qed.topo_charge(x_new[None, :])
            #  q_new = qed.batch_charges(x_new[None, :])
            # Update Markov Chain
            xarr.append(x_new)
            metrics = {
                'q': q_new,
                'dqsq': (q_new - q_old)**2,
                #  'dkl': calc_dkl(logp_new, logq_new),
                #  'ess': calc_ess(logp_new, logq_new),
                'logp': logp_new,
                'logq': logq_new,
                'acc': float(accepted),
            }

            for key, val in metrics.items():
                try:
                    history[key].append(val)
                except KeyError:
                    history[key] = [val]

                if writer is not None:
                    v = torch.tensor(val)
                    if len(v.shape) > 1:
                        writer.add_histogram(f'inference/{key}',
                                             v,
                                             global_step=step)
                    else:
                        writer.add_scalar(f'inference/{key}',
                                          v.mean(),
                                          global_step=step)

            step += 1

    history_ = {k: torch.Tensor(v).cpu().numpy() for k, v in history.items()}
    return history_