Beispiel #1
0
    def _activation_stats_cb(self, module, input, output):
        """Record the activation sparsity of 'module'

        This is a callback from the forward() of 'module'.
        """
        def to_np(stats):
            if isinstance(stats, tuple):
                return stats[0].detach().cpu().numpy()
            else:
                return stats.detach().cpu().numpy()

        # We get a batch of activations, from which we collect statistics
        if not output.is_contiguous():
            output = output.contiguous()
        act = output.view(output.size(0), -1)
        batch_min_list = to_np(torch.min(act, dim=1)).tolist()
        batch_max_list = to_np(torch.max(act, dim=1)).tolist()
        batch_mean_list = to_np(torch.mean(act, dim=1)).tolist()
        # If activation contains only a single element, standard-deviation is meaningless (and std() returns NaN)
        # Return 0 instead
        if act.shape[0] == act.numel():
            batch_std_list = to_np(torch.zeros(act.shape[0])).tolist()
        else:
            batch_std_list = to_np(torch.std(act, dim=1)).tolist()
        batch_l2_list = to_np(torch.norm(act, p=2, dim=1)).tolist()

        module.statistics_records['min'].extend(batch_min_list)
        module.statistics_records['max'].extend(batch_max_list)
        module.statistics_records['mean'].extend(batch_mean_list)
        module.statistics_records['std'].extend(batch_std_list)
        module.statistics_records['l2'].extend(batch_l2_list)
        module.statistics_records['shape'] = distiller.size2str(output)
        def update_record(record, tensor):
            if not tensor.is_contiguous():
                tensor = tensor.contiguous()
            act = tensor.view(tensor.size(0), -1)
            min_per_sample = act.min(dim=1)[0]
            max_per_sample = act.max(dim=1)[0]
            record['min'] = min(record['min'], min_per_sample.min().item())
            record['max'] = max(record['max'], max_per_sample.max().item())
            try:
                record['avg_min'] = update_mean(record['avg_min'],
                                                min_per_sample.mean().item())
                record['avg_max'] = update_mean(record['avg_max'],
                                                max_per_sample.mean().item())
                new_mean = update_mean(record['mean'], act.mean().item())
                record['std'] = update_std(tensor, record['std'],
                                           record['mean'], new_mean)
            except RuntimeError:
                record['avg_min'] = update_mean(
                    record['avg_min'],
                    min_per_sample.cpu().numpy().mean().item(0))
                record['avg_max'] = update_mean(
                    record['avg_max'],
                    max_per_sample.cpu().numpy().mean().item(0))
                new_mean = update_mean(record['mean'],
                                       act.cpu().numpy().mean().item(0))
                record['std'] = update_std(tensor.cpu().numpy(), record['std'],
                                           record['mean'], new_mean)
            record['mean'] = new_mean

            if not record['shape']:
                record['shape'] = distiller.size2str(tensor)
 def annotate_op_node(op):
     if op["type"] == "Conv":
         return [
             "sh={}".format(distiller.size2str(
                 op["attrs"]["kernel_shape"])),
             "g={}".format(str(op["attrs"]["group"])),
         ]
     return ""
Beispiel #4
0
 def annotate_op_node(op):
     if op['type'] == 'Conv':
         return [
             "sh={}".format(distiller.size2str(
                 op['attrs']['kernel_shape'])),
             "g={}".format(str(op['attrs']['group']))
         ]
     return ''
Beispiel #5
0
        def update_record(record, tensor):
            if not tensor.is_contiguous():
                tensor = tensor.contiguous()
            act = tensor.view(tensor.size(0), -1)
            if self.collecting_laplace:
                record['b'] = update_b(act, record['b'], record['mean'])
                return

            # In the general case, the average min/max that we're collecting are averages over the per-sample
            # min/max values. That is - we first calculate the min/max for each sample in the batch, then average
            # over that.
            # But - If each sample contains just a single value, then such a per-sample calculation we'll result in
            # avg_min = avg_max. So in that case we "revert" to calculating "global" values, for the whole batch,
            # instead of per-sample values
            dim = 0 if act.numel() == act.shape[0] else 1

            min_per_sample = act.min(dim=dim)[0]
            max_per_sample = act.max(dim=dim)[0]
            record['min'] = min(record['min'], min_per_sample.min().item())
            record['max'] = max(record['max'], max_per_sample.max().item())
            try:
                record['avg_min'] = update_mean(record['avg_min'],
                                                min_per_sample.mean().item())
                record['avg_max'] = update_mean(record['avg_max'],
                                                max_per_sample.mean().item())
                new_mean = update_mean(record['mean'], act.mean().item())
                record['std'] = update_std(tensor, record['std'],
                                           record['mean'], new_mean)
            except RuntimeError:
                record['avg_min'] = update_mean(
                    record['avg_min'],
                    min_per_sample.cpu().numpy().mean().item(0))
                record['avg_max'] = update_mean(
                    record['avg_max'],
                    max_per_sample.cpu().numpy().mean().item(0))
                new_mean = update_mean(record['mean'],
                                       act.cpu().numpy().mean().item(0))
                record['std'] = update_std(tensor.cpu().numpy(), record['std'],
                                           record['mean'], new_mean)
            record['mean'] = new_mean

            if not record['shape']:
                record['shape'] = distiller.size2str(tensor)
Beispiel #6
0
        def update_record(record, tensor):
            if tensor.dtype not in [
                    torch.float16, torch.float32, torch.float64
            ]:
                # Mean function only works for float tensors
                tensor = tensor.to(torch.float32)
            if not tensor.is_contiguous():
                tensor = tensor.contiguous()
            act = tensor.view(tensor.size(0), -1)
            numel = act.numel()
            if self.collecting_second_pass:
                record['b'] = update_b(act, record['b'], record['mean'],
                                       record['total_numel'])
                record['std'] = update_std(act, record['std'], record['mean'],
                                           record['total_numel'])
                record['total_numel'] += numel
                return

            # In the general case, the average min/max that we're collecting are averages over the per-sample
            # min/max values. That is - we first calculate the min/max for each sample in the batch, then average
            # over that.
            # But - If each sample contains just a single value, then such a per-sample calculation we'll result in
            # avg_min = avg_max. So in that case we "revert" to calculating "global" values, for the whole batch,
            # instead of per-sample values
            dim = 0 if numel == act.shape[0] else 1

            min_per_sample = act.min(dim=dim)[0]
            max_per_sample = act.max(dim=dim)[0]
            record['min'] = min(record['min'], min_per_sample.min().item())
            record['max'] = max(record['max'], max_per_sample.max().item())
            record['avg_min'] = update_running_mean(min_per_sample,
                                                    record['avg_min'],
                                                    record['total_numel'])
            record['avg_max'] = update_running_mean(max_per_sample,
                                                    record['avg_max'],
                                                    record['total_numel'])
            new_mean = update_running_mean(act, record['mean'],
                                           record['total_numel'])
            record['mean'] = new_mean
            record['total_numel'] += numel

            if not record['shape']:
                record['shape'] = distiller.size2str(tensor)