def test_fuse_modules_with_pre_exist_adj_map(): model = WrappedSequential(DummyA(), DummyB(), DummyD()) with pytest.raises(ValueError): mt.fuse_modules(model, types_sequence, fuse_fn, dummy_input=None, adjacency_map=None) dummy_input = torch.randn(10, 10) sg = SummaryGraph(deepcopy(model), dummy_input) adj_map = sg.adjacency_map() fused_dummy_input = mt.fuse_modules(deepcopy(model), types_sequence, fuse_fn, dummy_input=dummy_input, adjacency_map=None) compare_models(fused_dummy_input, fused_reference()) fused_pre_sg = mt.fuse_modules(deepcopy(model), types_sequence, fuse_fn, dummy_input=None, adjacency_map=adj_map) compare_models(fused_pre_sg, fused_reference()) fused_both = mt.fuse_modules(deepcopy(model), types_sequence, fuse_fn, dummy_input=dummy_input, adjacency_map=adj_map) compare_models(fused_both, fused_reference())
def test_scope_name_workarounds(): class ModelWithGemms(nn.Module): def __init__(self): super(ModelWithGemms, self).__init__() self.drop1 = nn.Dropout() self.fc1 = nn.Linear(100, 50) self.relu1 = nn.ReLU(inplace=True) self.drop2 = nn.Dropout() self.fc2 = nn.Linear(50, 25) self.relu2 = nn.ReLU(inplace=True) self.fc3 = nn.Linear(25, 1) self.drop3 = nn.Dropout() def forward(self, x): x = self.drop1(x) x = self.fc1(x) x = self.relu1(x) x = self.drop2(x) x = self.fc2(x) x = self.relu2(x) x = self.fc3(x) x = self.drop3(x) return x m = ModelWithGemms() dummy_input = distiller.get_dummy_input(input_shape=(1, 100)) expected_types = ('Gemm', 'Relu', 'Gemm', 'Relu', 'Gemm') # We have workarounds for 2 issues: # 1. GEMM ops get the scope name of the op that came before them # 2. Ops that come before a dropout op get the scope name of the dropout op # If both conditions apply, empirically that #2 is the issue that manifests # For the model above we expect the ops in the graph to be named (in order): # 'fc1', 'relu1', 'fc2', 'relu2', 'fc3' # (note that dropout ops are dropped) # # But without our workarounds in place, we'll get: # 'drop1', 'drop2', 'drop2__1', 'relu2', 'drop3' # # What happens is: # * 'fc1' - issue #1 applies, so 'fc1' --> 'drop1' # * 'relu1' - issue #2 applies, so 'relu1' --> 'drop2' # * 'fc2' - issue #1 applies, so 'fc1' --> 'drop2__1' ('__1' suffix because 'drop2' already exists) # * 'relu2' should be ok as-is # * 'fc3' is susceptible to both issues - it's a GEMM op AND it comes before a dropout. As mentioned above, # issue #2 "wins", so 'fc3' --> 'drop3' # We test without the workarounds as a means to see if the issues still exist. New PyTorch versions # may fix them, in which case we can remove the workarounds sg = SummaryGraph(m, dummy_input, apply_scope_name_workarounds=False) names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()]) assert names == ('drop1', 'drop2', 'drop2__1', 'relu2', 'drop3') assert types == expected_types # Now test with the workarounds sg = SummaryGraph(m, dummy_input) names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()]) assert names == ('fc1', 'relu1', 'fc2', 'relu2', 'fc3') assert types == expected_types
def named_params_layers_test_aux(dataset, arch, dataparallel:bool): model = create_model(False, dataset, arch, parallel=dataparallel) sgraph = SummaryGraph(model, get_input(dataset)) sgraph_layer_names = set(k for k, i, j in sgraph.named_params_layers()) for layer_name in sgraph_layer_names: assert (sgraph.find_op(layer_name) is not None, '{} was not found in summary graph'.format(layer_name))
def test_named_params_layers(dataset, arch, parallel): model = create_model(False, dataset, arch, parallel=parallel) sgraph = SummaryGraph(model, distiller.get_dummy_input(dataset)) sgraph_layer_names = set(k for k, i, j in sgraph.named_params_layers()) for layer_name in sgraph_layer_names: assert sgraph.find_op( layer_name ) is not None, '{} was not found in summary graph'.format(layer_name)
def test_weights_size_attr(dataset, arch, parallel): model = create_model(False, dataset, arch, parallel=parallel) sgraph = SummaryGraph(model, distiller.get_dummy_input(dataset)) distiller.assign_layer_fq_names(model) for name, mod in model.named_modules(): if isinstance(mod, nn.Conv2d) or isinstance(mod, nn.Linear): op = sgraph.find_op(name) assert op is not None assert op['attrs']['weights_vol'] == distiller.volume(mod.weight)
def test_scope_name_workarounds(): class ModelWithGemms(nn.Module): def __init__(self): super(ModelWithGemms, self).__init__() self.drop1 = nn.Dropout() self.fc1 = nn.Linear(100, 50) self.relu1 = nn.ReLU(inplace=True) self.drop2 = nn.Dropout() self.fc2 = nn.Linear(50, 25) self.relu2 = nn.ReLU(inplace=True) self.fc3 = nn.Linear(25, 1) self.drop3 = nn.Dropout() def forward(self, x): x = self.drop1(x) x = self.fc1(x) x = self.relu1(x) x = self.drop2(x) x = self.fc2(x) x = self.relu2(x) x = self.fc3(x) x = self.drop3(x) return x m = ModelWithGemms() dummy_input = distiller.get_dummy_input(input_shape=(1, 100)) expected_types = ('Gemm', 'Relu', 'Gemm', 'Relu', 'Gemm') # We have a workaround for the following issue: # (used to be 2 issues but one got fixed in PyTorch 1.2) # * Ops that come before a dropout op get the scope name of the dropout op # For the model above we expect the ops in the graph to be named (in order): # 'fc1', 'relu1', 'fc2', 'relu2', 'fc3' # (note that dropout ops are dropped) # # But since 'relu1' and 'fc3' come before a dropout op, without the workaround in place we'll get: # 'fc1', 'drop2', 'fc2', 'relu2', 'drop3' # We test without the workarounds as a means to see if the issues still exist. New PyTorch versions # may fix them, in which case we can remove the workarounds sg = SummaryGraph(m, dummy_input, apply_scope_name_workarounds=False) names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()]) assert names == ('fc1', 'drop2', 'fc2', 'relu2', 'drop3') assert types == expected_types # Now test with the workarounds sg = SummaryGraph(m, dummy_input) names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()]) assert names == ('fc1', 'relu1', 'fc2', 'relu2', 'fc3') assert types == expected_types
def test_gemm_nodes_scope_names(): class ModelWithGemms(nn.Module): def __init__(self): super(ModelWithGemms, self).__init__() self.drop1 = nn.Dropout() self.fc1 = nn.Linear(100, 50) self.relu1 = nn.ReLU(inplace=True) self.drop2 = nn.Dropout() self.fc2 = nn.Linear(50, 25) self.relu2 = nn.ReLU(inplace=True) self.fc3 = nn.Linear(25, 1) def forward(self, x): # Isn't this pretty... return self.fc3(self.relu2(self.fc2(self.drop2(self.relu1(self.fc1(self.drop1(x))))))) m = ModelWithGemms() sg = SummaryGraph(m, distiller.get_dummy_input(input_shape=(1, 100))) # For the model above we expect the ops to be named (in order): # 'drop1', 'fc1', 'relu1', 'drop2', 'fc2', 'relu2', 'fc3' # But without our workaround in place, they'll be named: # 'drop1', 'drop1__1', 'relu1', 'drop2', 'drop2__1', 'relu2', 'relu2__1' # (that is - each FC node gets the name of the node before) names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()]) assert names == ('drop1', 'fc1', 'relu1', 'drop2', 'fc2', 'relu2', 'fc3') assert types == ('Dropout', 'Gemm', 'Relu', 'Dropout', 'Gemm', 'Relu', 'Gemm')
def create_graph(dataset, arch): dummy_input = get_input(dataset) assert dummy_input is not None, "Unsupported dataset ({}) - aborting draw operation".format(dataset) model = create_model(False, dataset, arch, parallel=False) assert model is not None return SummaryGraph(model, dummy_input)
def collect_conv_details(model, dataset, perform_thinning, layers_to_prune=None): dummy_input = distiller.get_dummy_input(dataset) g = SummaryGraph(model, dummy_input) conv_layers = OrderedDict() total_macs = 0 total_params = 0 for id, (name, m) in enumerate(model.named_modules()): if isinstance(m, torch.nn.Conv2d): conv = SimpleNamespace() conv.t = len(conv_layers) conv.k = m.kernel_size[0] conv.stride = m.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.weights_vol = conv_op['attrs']['weights_vol'] total_params += conv.weights_vol conv.macs = conv_op['attrs']['MACs'] conv_pname = name + ".weight" conv_p = distiller.model_find_param(model, conv_pname) if not perform_thinning: #conv.macs *= distiller.density_ch(conv_p) # Channel pruning conv.macs *= distiller.density_3D(conv_p) # Filter pruning total_macs += conv.macs conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] conv.name = name conv.id = id if layers_to_prune is None or name in layers_to_prune: conv_layers[len(conv_layers)] = conv return conv_layers, total_macs, total_params
def test_merge_pad_avgpool(): class ModelWithAvgPool(nn.Module): def __init__(self): super(ModelWithAvgPool, self).__init__() self.conv = nn.Conv2d(3, 10, 5) self.avgpool = nn.AvgPool2d(2) def forward(self, input): return self.avgpool(self.conv(input)) m = ModelWithAvgPool() sg = SummaryGraph(m, distiller.get_dummy_input(input_shape=(1, 3, 50, 50))) avgpool_ops = [op_name for op_name in sg.ops if 'avgpool' in op_name] assert len(avgpool_ops) == 1 assert sg.ops[avgpool_ops[0]]['name'] == 'avgpool' assert sg.ops[avgpool_ops[0]]['type'] == 'AveragePool'
def create_graph(dataset, arch, parallel=False): dummy_input = distiller.get_dummy_input(dataset) model = create_model(False, dataset, arch, parallel) assert model is not None return SummaryGraph(model, dummy_input)
def test_adjacency_map(parallel, dedicated_modules): class TestModel(nn.Module): def __init__(self): super(TestModel, self).__init__() self.conv = nn.Conv2d(3, 10, 5) self.bn = nn.BatchNorm2d(10) self.post_conv_bn = nn.ModuleList([nn.Tanh(), nn.ReLU()]) def forward(self, x): res = self.conv(x) y = self.bn(res) for m in self.post_conv_bn: y = m(y) return y + res def check_adj_entry(actual, expected): assert actual.op_meta == expected.op_meta assert actual.predecessors == expected.predecessors assert actual.successors == expected.successors prefix = 'module.' if parallel else '' m = TestModel() if parallel: m = nn.DataParallel(m) sg = SummaryGraph(m, distiller.get_dummy_input(input_shape=(1, 3, 10, 10))) adj_map = sg.adjacency_map(dedicated_modules_only=dedicated_modules) if dedicated_modules: assert len(adj_map) == 4 else: assert len(adj_map) == 5 conv_op_meta = OpSimpleMetadata(prefix + 'conv', 'Conv') bn_op_meta = OpSimpleMetadata(prefix + 'bn', 'BatchNormalization') tanh_op_meta = OpSimpleMetadata(prefix + 'post_conv_bn.0', 'Tanh') relu_op_meta = OpSimpleMetadata(prefix + 'post_conv_bn.1', 'Relu') add_op_meta = OpSimpleMetadata('top_level_op', 'Add') name = conv_op_meta.name assert name in adj_map expected = AdjacentsEntry(conv_op_meta) expected.successors = [bn_op_meta] if dedicated_modules else [ bn_op_meta, add_op_meta ] check_adj_entry(adj_map[name], expected) name = bn_op_meta.name assert name in adj_map expected = AdjacentsEntry(bn_op_meta) expected.predecessors = [conv_op_meta] expected.successors = [tanh_op_meta] check_adj_entry(adj_map[name], expected) name = tanh_op_meta.name assert name in adj_map expected = AdjacentsEntry(tanh_op_meta) expected.predecessors = [bn_op_meta] expected.successors = [relu_op_meta] check_adj_entry(adj_map[name], expected) name = relu_op_meta.name assert name in adj_map expected = AdjacentsEntry(relu_op_meta) expected.predecessors = [tanh_op_meta] expected.successors = [] if dedicated_modules else [add_op_meta] check_adj_entry(adj_map[name], expected) name = add_op_meta.name if dedicated_modules: assert name not in adj_map else: assert name in adj_map expected = AdjacentsEntry(add_op_meta) expected.predecessors = [relu_op_meta, conv_op_meta] check_adj_entry(adj_map[name], expected)
def get_network_details(model, dataset, dependency_type, layers_to_prune=None): def make_conv(model, conv_module, g, name, seq_id, layer_id): conv = SimpleNamespace() conv.name = name conv.id = layer_id conv.t = seq_id conv.k = conv_module.kernel_size[0] conv.stride = conv_module.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.weights_vol = conv_op['attrs']['weights_vol'] conv.macs = conv_op['attrs']['MACs'] conv.n_ofm = conv_op['attrs']['n_ofm'] conv.n_ifm = conv_op['attrs']['n_ifm'] conv_pname = name + ".weight" conv_p = distiller.model_find_param(model, conv_pname) conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] return conv def make_fc(model, fc_module, g, name, seq_id, layer_id): fc = SimpleNamespace() fc.name = name fc.id = layer_id fc.t = seq_id # Use the SummaryGraph to obtain some other details of the models fc_op = g.find_op(normalize_module_name(name)) assert fc_op is not None fc.weights_vol = fc_op['attrs']['weights_vol'] fc.macs = fc_op['attrs']['MACs'] fc.n_ofm = fc_op['attrs']['n_ofm'] fc.n_ifm = fc_op['attrs']['n_ifm'] fc_pname = name + ".weight" fc_p = distiller.model_find_param(model, fc_pname) return fc dummy_input = distiller.get_dummy_input(dataset) g = SummaryGraph(model, dummy_input) all_layers = OrderedDict() pruned_indices = list() dependent_layers = set() total_macs = 0 total_params = 0 layers = OrderedDict({mod_name: m for mod_name, m in model.named_modules() if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear))}) for layer_id, (name, m) in enumerate(layers.items()): if isinstance(m, torch.nn.Conv2d): conv = make_conv(model, m, g, name, seq_id=len(pruned_indices), layer_id=layer_id) all_layers[layer_id] = conv total_params += conv.weights_vol total_macs += conv.macs if layers_to_prune is None or name in layers_to_prune: pruned_indices.append(layer_id) # Find the data-dependent layers of this convolution from utils.data_dependencies import find_dependencies conv.dependencies = list() find_dependencies(dependency_type, g, all_layers, name, conv.dependencies) dependent_layers.add(tuple(conv.dependencies)) elif isinstance(m, torch.nn.Linear): fc = make_fc(model, m, g, name, seq_id=len(pruned_indices), layer_id=layer_id) all_layers[layer_id] = fc total_macs += fc.macs total_params += fc.weights_vol def convert_layer_names_to_indices(layer_names): """Args: layer_names - list of layer names Returns: list of layer indices """ layer_indices = [index for name in layer_names for index, layer in all_layers.items() if layer.name == name[0]] return layer_indices dependent_indices = convert_layer_names_to_indices(dependent_layers) return all_layers, pruned_indices, dependent_indices, total_macs, total_params
def layers_topological_order(model, dummy_input, recurrent=False): """ Prepares an ordered list of layers to quantize sequentially. This list has all the layers ordered by their topological order in the graph. Args: model (nn.Module): the model to quantize. dummy_input (torch.Tensor): an input to be passed through the model. recurrent (bool): indication on whether the model might have recurrent connections. """ class _OpRank: def __init__(self, adj_entry, rank=None): self.adj_entry = adj_entry self._rank = rank or 0 @property def rank(self): return self._rank @rank.setter def rank(self, val): self._rank = max(val, self._rank) def __repr__(self): return '_OpRank(\'%s\' | %d)' % (self.adj_entry.op_meta.name, self.rank) adj_map = SummaryGraph(model, dummy_input).adjacency_map() ranked_ops = {k: _OpRank(v, 0) for k, v in adj_map.items()} def _recurrent_ancestor(ranked_ops_dict, dest_op_name, src_op_name): def _is_descendant(parent_op_name, dest_op_name): successors_names = [ op.name for op in adj_map[parent_op_name].successors ] if dest_op_name in successors_names: return True for succ_name in successors_names: if _is_descendant(succ_name, dest_op_name): return True return False return _is_descendant(dest_op_name, src_op_name) and \ (0 < ranked_ops_dict[dest_op_name].rank < ranked_ops_dict[src_op_name].rank) def rank_op(ranked_ops_dict, op_name, rank): ranked_ops_dict[op_name].rank = rank for child_op in adj_map[op_name].successors: # In recurrent models: if a successor is also an ancestor - we don't increment its rank. if not recurrent or not _recurrent_ancestor( ranked_ops_dict, child_op.name, op_name): rank_op(ranked_ops_dict, child_op.name, ranked_ops_dict[op_name].rank + 1) roots = [k for k, v in adj_map.items() if len(v.predecessors) == 0] for root_op_name in roots: rank_op(ranked_ops, root_op_name, 0) # Take only the modules from the original model # module_dict = dict(model.named_modules()) # Neta ret = sorted([k for k in ranked_ops.keys()], key=lambda k: ranked_ops[k].rank) # Check that only the actual roots have a rank of 0 assert {k for k in ret if ranked_ops[k].rank == 0} <= set(roots) return ret