def _activation_stats_cb(self, module, input, output): """Record the activation sparsity of 'module' This is a callback from the forward() of 'module'. """ def to_np(stats): if isinstance(stats, tuple): return stats[0].detach().cpu().numpy() else: return stats.detach().cpu().numpy() # We get a batch of activations, from which we collect statistics if not output.is_contiguous(): output = output.contiguous() act = output.view(output.size(0), -1) batch_min_list = to_np(torch.min(act, dim=1)).tolist() batch_max_list = to_np(torch.max(act, dim=1)).tolist() batch_mean_list = to_np(torch.mean(act, dim=1)).tolist() # If activation contains only a single element, standard-deviation is meaningless (and std() returns NaN) # Return 0 instead if act.shape[0] == act.numel(): batch_std_list = to_np(torch.zeros(act.shape[0])).tolist() else: batch_std_list = to_np(torch.std(act, dim=1)).tolist() batch_l2_list = to_np(torch.norm(act, p=2, dim=1)).tolist() module.statistics_records['min'].extend(batch_min_list) module.statistics_records['max'].extend(batch_max_list) module.statistics_records['mean'].extend(batch_mean_list) module.statistics_records['std'].extend(batch_std_list) module.statistics_records['l2'].extend(batch_l2_list) module.statistics_records['shape'] = distiller.size2str(output)
def update_record(record, tensor): if not tensor.is_contiguous(): tensor = tensor.contiguous() act = tensor.view(tensor.size(0), -1) min_per_sample = act.min(dim=1)[0] max_per_sample = act.max(dim=1)[0] record['min'] = min(record['min'], min_per_sample.min().item()) record['max'] = max(record['max'], max_per_sample.max().item()) try: record['avg_min'] = update_mean(record['avg_min'], min_per_sample.mean().item()) record['avg_max'] = update_mean(record['avg_max'], max_per_sample.mean().item()) new_mean = update_mean(record['mean'], act.mean().item()) record['std'] = update_std(tensor, record['std'], record['mean'], new_mean) except RuntimeError: record['avg_min'] = update_mean( record['avg_min'], min_per_sample.cpu().numpy().mean().item(0)) record['avg_max'] = update_mean( record['avg_max'], max_per_sample.cpu().numpy().mean().item(0)) new_mean = update_mean(record['mean'], act.cpu().numpy().mean().item(0)) record['std'] = update_std(tensor.cpu().numpy(), record['std'], record['mean'], new_mean) record['mean'] = new_mean if not record['shape']: record['shape'] = distiller.size2str(tensor)
def annotate_op_node(op): if op["type"] == "Conv": return [ "sh={}".format(distiller.size2str( op["attrs"]["kernel_shape"])), "g={}".format(str(op["attrs"]["group"])), ] return ""
def annotate_op_node(op): if op['type'] == 'Conv': return [ "sh={}".format(distiller.size2str( op['attrs']['kernel_shape'])), "g={}".format(str(op['attrs']['group'])) ] return ''
def update_record(record, tensor): if not tensor.is_contiguous(): tensor = tensor.contiguous() act = tensor.view(tensor.size(0), -1) if self.collecting_laplace: record['b'] = update_b(act, record['b'], record['mean']) return # In the general case, the average min/max that we're collecting are averages over the per-sample # min/max values. That is - we first calculate the min/max for each sample in the batch, then average # over that. # But - If each sample contains just a single value, then such a per-sample calculation we'll result in # avg_min = avg_max. So in that case we "revert" to calculating "global" values, for the whole batch, # instead of per-sample values dim = 0 if act.numel() == act.shape[0] else 1 min_per_sample = act.min(dim=dim)[0] max_per_sample = act.max(dim=dim)[0] record['min'] = min(record['min'], min_per_sample.min().item()) record['max'] = max(record['max'], max_per_sample.max().item()) try: record['avg_min'] = update_mean(record['avg_min'], min_per_sample.mean().item()) record['avg_max'] = update_mean(record['avg_max'], max_per_sample.mean().item()) new_mean = update_mean(record['mean'], act.mean().item()) record['std'] = update_std(tensor, record['std'], record['mean'], new_mean) except RuntimeError: record['avg_min'] = update_mean( record['avg_min'], min_per_sample.cpu().numpy().mean().item(0)) record['avg_max'] = update_mean( record['avg_max'], max_per_sample.cpu().numpy().mean().item(0)) new_mean = update_mean(record['mean'], act.cpu().numpy().mean().item(0)) record['std'] = update_std(tensor.cpu().numpy(), record['std'], record['mean'], new_mean) record['mean'] = new_mean if not record['shape']: record['shape'] = distiller.size2str(tensor)
def update_record(record, tensor): if tensor.dtype not in [ torch.float16, torch.float32, torch.float64 ]: # Mean function only works for float tensors tensor = tensor.to(torch.float32) if not tensor.is_contiguous(): tensor = tensor.contiguous() act = tensor.view(tensor.size(0), -1) numel = act.numel() if self.collecting_second_pass: record['b'] = update_b(act, record['b'], record['mean'], record['total_numel']) record['std'] = update_std(act, record['std'], record['mean'], record['total_numel']) record['total_numel'] += numel return # In the general case, the average min/max that we're collecting are averages over the per-sample # min/max values. That is - we first calculate the min/max for each sample in the batch, then average # over that. # But - If each sample contains just a single value, then such a per-sample calculation we'll result in # avg_min = avg_max. So in that case we "revert" to calculating "global" values, for the whole batch, # instead of per-sample values dim = 0 if numel == act.shape[0] else 1 min_per_sample = act.min(dim=dim)[0] max_per_sample = act.max(dim=dim)[0] record['min'] = min(record['min'], min_per_sample.min().item()) record['max'] = max(record['max'], max_per_sample.max().item()) record['avg_min'] = update_running_mean(min_per_sample, record['avg_min'], record['total_numel']) record['avg_max'] = update_running_mean(max_per_sample, record['avg_max'], record['total_numel']) new_mean = update_running_mean(act, record['mean'], record['total_numel']) record['mean'] = new_mean record['total_numel'] += numel if not record['shape']: record['shape'] = distiller.size2str(tensor)