예제 #1
0
    def successors_f(self,
                     node_name,
                     successors_types,
                     done_list=None,
                     logging=None):
        """Returns a list of <op>'s successors, if they match the <successors_types> criteria.

        Traverse the graph, starting at node <node_name>, and search for successor
        nodes, that have one of the node types listed in <successors_types>.
        If none is found, then return an empty list.

        <node_name> and the returned list of successors are strings, because
        """
        node_name = distiller.normalize_module_name(node_name)
        node = self.find_op(node_name)
        node_is_an_op = True
        if node is None:
            node_is_an_op = False
            node = self.find_param(node_name)
            if node is None:
                #raise ValueError("something went wrong")
                return []

        if done_list is None:
            done_list = []

        done_list.append(node_name)

        if not isinstance(successors_types, list):
            successors_types = [successors_types]

        if node_is_an_op:
            # We check if we found the type of node we're looking for,
            # and that this is not the first node in our search.
            if node['type'] in successors_types and len(done_list) > 1:
                return [
                    distiller.denormalize_module_name(self._src_model,
                                                      node_name)
                ]

            # This is an operation node
            succs = [
                edge.dst for edge in self.edges
                if (edge.src == node_name and edge.dst not in done_list)
            ]
        else:
            # This is a data node
            succs = [
                edge.dst for edge in self.edges
                if (edge.src == node_name and edge.dst not in done_list)
            ]
        ret = []
        for successor in succs:
            ret += self.successors_f(successor, successors_types, done_list,
                                     logging)

        return [
            distiller.denormalize_module_name(self._src_model, node)
            for node in ret
        ]
예제 #2
0
def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing channels from Convolution layers.

    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has channels that are all zeros.
    For each weights tensor that has at least one zero-channel, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the channels.
    """
    msglogger.info("Invoking create_thinning_recipe_channels")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name : m for mod_name, m in model.named_modules()}

    # Traverse all of the model's parameters, search for zero-channels, and
    # create a thinning recipe that descibes the required changes to the model.
    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights (of Convolution layers)
        if param.dim() != 4:
            continue

        num_channels = param.size(1)
        nonzero_channels = find_nonzero_channels(param, param_name)

        # If there are non-zero channels in this tensor then continue to next tensor
        if num_channels <= len(nonzero_channels):
            continue

        # We are removing channels, so update the number of incoming channels (IFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(thinning_recipe, layer_name, key='in_channels', val=len(nonzero_channels))

        # Select only the non-zero filters
        indices = nonzero_channels.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (1, indices))

        # Find all instances of Convolution layers that immediately preceed this layer
        predecessors = sgraph.predecessors_f(normalize_module_name(layer_name), ['Conv'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        predecessors = [denormalize_module_name(model, predecessor) for predecessor in predecessors]
        for predecessor in predecessors:
            # For each of the convolutional layers that preceed, we have to reduce the number of output channels.
            append_module_directive(thinning_recipe, predecessor, key='out_channels', val=len(nonzero_channels))

            # Now remove channels from the weights tensor of the successor conv
            append_param_directive(thinning_recipe, predecessor+'.weight', (0, indices))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.predecessors_f(normalize_module_name(layer_name), ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_channels), thin_features=indices)

    return thinning_recipe
예제 #3
0
    def predecessors_f(self,
                       node_name,
                       predecessors_types,
                       done_list=None,
                       logging=None):
        """Returns a list of <op>'s predecessors, if they match the <predecessors_types> criteria.
        """
        node_name = distiller.normalize_module_name(node_name)
        node = self.find_op(node_name)
        node_is_an_op = True
        if node is None:
            node_is_an_op = False
            node = self.find_param(node_name)
            if node is None:
                msglogger.warning(
                    "predecessors_f: Could not find node {}".format(node_name))
                return []

        if done_list is None:
            done_list = []

        done_list.append(node_name)

        if not isinstance(predecessors_types, list):
            predecessors_types = [predecessors_types]

        if node_is_an_op:
            # We check if we found the type of node we're looking for,
            # and that this is not the first node in our search.
            if node['type'] in predecessors_types and len(done_list) > 1:
                return [
                    distiller.denormalize_module_name(self._src_model,
                                                      node_name)
                ]

            # This is an operation node
            preds = [
                edge.src for edge in self.edges
                if (edge.dst == node_name and edge.src not in done_list)
            ]
        else:
            # This is a data node
            preds = [
                edge.src for edge in self.edges
                if (edge.dst == node_name and edge.src not in done_list)
            ]
        ret = []
        for predecessor in preds:
            ret += self.predecessors_f(predecessor, predecessors_types,
                                       done_list, logging)

        return [
            distiller.denormalize_module_name(self._src_model, node)
            for node in ret
        ]
예제 #4
0
    def successors(self, node, depth, done_list=None, denorm_names=True):
        """Returns a list of <op>'s successors"""
        if done_list is None:
            done_list = []

        node_name = node['name'] if isinstance(node, dict) else node
        succs = [
            edge.dst for edge in self.edges
            if (edge.src == node_name and edge.dst not in done_list)
        ]
        done_list += succs

        if depth == 1:
            ret = succs
        else:
            ret = []
            for successor in succs:
                ret += self.successors(successor, depth - 1, done_list,
                                       denorm_names)

        if denorm_names:
            ret = [
                distiller.denormalize_module_name(self._src_model, x)
                for x in ret
            ]
        return ret
예제 #5
0
def append_module_directive(model, thinning_recipe, module_name, key, val):
    msglogger.debug("\t[recipe] setting {}.{} = {}".format(
        module_name, key, val))
    module_name = denormalize_module_name(model, module_name)
    mod_directive = thinning_recipe.modules.get(module_name, {})
    mod_directive[key] = val
    thinning_recipe.modules[module_name] = mod_directive
예제 #6
0
    def successors(self, node, depth, done_list=None):
        """Returns a list of <op>'s successors"""
        if done_list is None:
            done_list = []

        if isinstance(node, dict):
            # This is an operation node
            succs = [
                edge.dst
                for edge in self.edges
                if (edge.src == node["name"] and edge.dst not in done_list)
            ]
            done_list += succs
        else:
            # This is a data node
            succs = [
                edge.dst
                for edge in self.edges
                if (edge.src == node and edge.dst not in done_list)
            ]
            done_list += succs

        if depth == 1:
            ret = succs
        else:
            ret = []
            for successor in succs:
                ret += self.successors(successor, depth - 1, done_list)

        return [distiller.denormalize_module_name(self._src_model, x) for x in ret]
예제 #7
0
    def predecessors(self, op, depth, done_list=None):
        """Returns a list of <op>'s predecessors"""
        if done_list is None:
            done_list = []

        if isinstance(op, dict):
            preds = [
                edge.src
                for edge in self.edges
                if (edge.dst == op["name"] and edge.src not in done_list)
            ]
            done_list += preds
        else:
            preds = [
                edge.src
                for edge in self.edges
                if (edge.dst == op and edge.src not in done_list)
            ]
            done_list += preds

        if depth == 1:
            ret = preds
        else:
            ret = []
            for predecessor in preds:
                ret += self.predecessors(predecessor, depth - 1, done_list)

        return [distiller.denormalize_module_name(self._src_model, x) for x in ret]
예제 #8
0
 def named_params_layers(self):
     for param_name, param in self._src_model.named_parameters():
         # remove the extension of param_name, and then normalize it
         # to create a normalized layer name
         normalized_layer_name = distiller.normalize_module_name(
             '.'.join(param_name.split('.')[:-1]))
         sgraph_layer_name = distiller.denormalize_module_name(
             self._src_model, normalized_layer_name)
         yield sgraph_layer_name, param_name, param
예제 #9
0
def name_test(dataset, arch):
    model = create_model(False, dataset, arch, parallel=False)
    modelp = create_model(False, dataset, arch, parallel=True)
    assert model is not None and modelp is not None

    mod_names   = [mod_name for mod_name, _ in model.named_modules()]
    mod_names_p = [mod_name for mod_name, _ in modelp.named_modules()]
    assert mod_names is not None and mod_names_p is not None
    assert len(mod_names)+1 == len(mod_names_p)

    for i in range(len(mod_names)-1):
        assert mod_names[i+1] == normalize_module_name(mod_names_p[i+2])
        logging.debug("{} {} {}".format(mod_names_p[i+2], mod_names[i+1], normalize_module_name(mod_names_p[i+2])))
        assert mod_names_p[i+2] == denormalize_module_name(modelp, mod_names[i+1])
예제 #10
0
    def adjacency_map(self, dedicated_modules_only=False):
        """Returns a mapping from each op in the graph to its immediate predecessors and successors.

        The keys in the generated mapping are op names, and the values are instances of AdjacentsEntry.

        The op names are "de-normalized", meaning they can be used directly with the underlying model's
        named_modules(), for example.

        Args:
            dedicated_modules_only (bool): If set, the generated mapping will not include any ops that can't be
              associated with a dedicated module within the underlying model. Examples of this will be
              functional calls, such as "F.relu()", and tensor operations, such as "t3 = t1 + t2".
        """
        adj_map = OrderedDict()

        for op_name, op in self.ops.items():

            def dedicated_module_check(n):
                module_name = self.ops[distiller.normalize_module_name(
                    n)]['module-name']
                return len(self.module_ops_map[module_name]
                           ) == 1 or not dedicated_modules_only

            if not dedicated_module_check(op_name):
                continue

            entry = AdjacentsEntry()
            # Find the immediate preceding and succeeding modules. Depth of 1 gets us the
            # input and output tensors, depth of 2 gets the actual modules
            entry.predecessors = [
                n for n in self.predecessors(op, 2)
                if dedicated_module_check(n)
            ]
            entry.successors = [
                n for n in self.successors(op, 2) if dedicated_module_check(n)
            ]

            adj_map[distiller.denormalize_module_name(self._src_model,
                                                      op_name)] = entry

        return adj_map
예제 #11
0
def append_module_directive(model, thinning_recipe, module_name, key, val):
    module_name = denormalize_module_name(model, module_name)
    mod_directive = thinning_recipe.modules.get(module_name, {})
    mod_directive[key] = val
    thinning_recipe.modules[module_name] = mod_directive
예제 #12
0
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing filters from Convolution layers.
    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has filters that are all zeros.
    For each weights tensor that has at least one zero-filter, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the filters.
    """
    msglogger.info("Invoking create_thinning_recipe_filters")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name: m for mod_name, m in model.named_modules()}

    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights
        if param.dim() != 4:
            continue

        # Find the number of zero-valued filters in this weights tensor
        filter_view = param.view(param.size(0), -1)
        num_filters = filter_view.size()[0]
        nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1))
        num_nnz_filters = nonzero_filters.nelement()
        if num_nnz_filters == 0:
            raise ValueError(
                "Trying to set zero filters for parameter %s is not allowed" %
                param_name)
        # If there are non-zero filters in this tensor then continue to next tensor
        if num_filters <= num_nnz_filters:
            msglogger.debug("SKipping {} shape={}".format(
                param_name_2_layer_name(param_name), param.shape))
            continue

        msglogger.info("In tensor %s found %d/%d zero filters", param_name,
                       num_filters - num_nnz_filters, num_filters)

        # We are removing filters, so update the number of outgoing channels (OFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(model,
                                thinning_recipe,
                                layer_name,
                                key='out_channels',
                                val=num_nnz_filters)

        # Select only the non-zero filters
        indices = nonzero_filters.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (0, indices))

        if layers[layer_name].bias is not None:
            # This convolution has bias coefficients
            append_param_directive(thinning_recipe, layer_name + '.bias',
                                   (0, indices))

        # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer
        successors = sgraph.successors_f(normalize_module_name(layer_name),
                                         ['Conv', 'Gemm'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        successors = [
            denormalize_module_name(model, successor)
            for successor in successors
        ]
        for successor in successors:

            if isinstance(layers[successor], torch.nn.modules.Conv2d):
                # For each of the convolutional layers that follow, we have to reduce the number of input channels.
                append_module_directive(model,
                                        thinning_recipe,
                                        successor,
                                        key='in_channels',
                                        val=num_nnz_filters)
                msglogger.debug("[recipe] {}: setting in_channels = {}".format(
                    successor, num_nnz_filters))

                # Now remove channels from the weights tensor of the successor conv
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, successor) + '.weight',
                    (1, indices))

            elif isinstance(layers[successor], torch.nn.modules.Linear):
                # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member
                fm_size = layers[successor].in_features // layers[
                    layer_name].out_channels
                in_features = fm_size * num_nnz_filters
                append_module_directive(model,
                                        thinning_recipe,
                                        successor,
                                        key='in_features',
                                        val=in_features)
                msglogger.debug("[recipe] {}: setting in_features = {}".format(
                    successor, in_features))

                # Now remove channels from the weights tensor of the successor FC layer:
                # This is a bit tricky:
                fm_height = fm_width = int(math.sqrt(fm_size))
                view_4D = (layers[successor].out_features,
                           layers[layer_name].out_channels, fm_height,
                           fm_width)
                view_2D = (layers[successor].out_features, in_features)
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, successor) + '.weight',
                    (1, indices, view_4D, view_2D))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.successors_f(normalize_module_name(layer_name),
                                        ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe,
                        layers,
                        bn_layer_name,
                        len_thin_features=num_nnz_filters,
                        thin_features=indices)
    return thinning_recipe
예제 #13
0
    def __init__(self, model, dummy_input, apply_scope_name_workarounds=True):
        self._src_model = model
        model_clone = distiller.make_non_parallel_copy(model)

        # Switch all instances of torch.nn.ModuleList in the model to our DistillerModuleList
        # See documentation of _DistillerModuleList class for details on why this is done
        model_clone, converted_module_names_map = _to_distiller_modulelist(model_clone)

        with torch.onnx.set_training(model_clone, False):
            
            device = distiller.model_device(model_clone)
            dummy_input = distiller.convert_tensors_recursively_to(dummy_input, device=device)
            trace, _ = jit.get_trace_graph(model_clone, dummy_input, _force_outplace=True)

            # As of PyTorch 1.1.0, ONNX trace optimization has two issues that result in incorrect scope names
            # of nodes in the trace graph.
            # These can make it impossible, in some cases, to derive the connectivity of the model using the original
            # module names. So we try to detect these cases and apply workarounds

            # Issue #1:
            #   Gemm ops (aka "Linear" / "addmm" / "FC") get the scope name of the last non-Gemm node
            #   that came before them.
            #   Note that if the node prior to the Gemm node isn't the result of a dedicated module call,
            #   then this issue doesn't occur. For simplicity we just track all Gemms.
            # TODO: This should be fixed in PyTorch 1.2.0, revisit when it's released
            aten_addmm_nodes_scope_names = []
            onnx_gemm_count = 0

            # Issue #2:
            #   Dropout ops are removed by ONNX trace optimization. However, the op BEFORE the original dropout op
            #   gets the scope name of the dropout op
            pre_dropout_nodes_scope_names = OrderedDict()

            prev_non_dropout_op = None
            for node in trace.graph().nodes():
                kind = node.kind()
                if 'aten' not in kind:
                    continue
                if kind == 'aten::dropout':
                    if prev_non_dropout_op:
                        pre_dropout_nodes_scope_names[node.scopeName()] = prev_non_dropout_op.scopeName()
                else:
                    prev_non_dropout_op = node
                    if kind == 'aten::addmm':
                        aten_addmm_nodes_scope_names.append(node.scopeName())

            # Let ONNX do the heavy lifting: fusing the convolution nodes; fusing the nodes
            # composing a GEMM operation; etc.
            torch.onnx._optimize_trace(trace, torch.onnx.OperatorExportTypes.ONNX)

            graph = trace.graph()
            self.ops = OrderedDict()
            self.module_ops_map = defaultdict(list)
            self.params = OrderedDict()
            self.edges = []
            self.temp = OrderedDict()

            in_out = list(graph.inputs()) + list(graph.outputs())
            for param in in_out:
                self.__add_param(param)

            for node in graph.nodes():
                new_op = self.__create_op(node)

                if apply_scope_name_workarounds:
                    # Here we apply the workaround to the Gemm nodes scope name issue mentioned above
                    if new_op['type'] == 'Gemm':
                        new_op['orig-name'] = aten_addmm_nodes_scope_names[onnx_gemm_count]
                        new_op['name'] = new_op['orig-name']
                        onnx_gemm_count += 1

                    # Here we apply the workaround to the issue of dropout op scope name overriding previous op's
                    # scope name
                    if new_op['name'] in pre_dropout_nodes_scope_names:
                        new_op['orig-name'] = pre_dropout_nodes_scope_names[new_op['name']]
                        new_op['name'] = new_op['orig-name']

                # Convert the graph node's scope name to a PyTorch module name
                module_name = onnx_name_2_pytorch_name(new_op['orig-name'])

                # Get name from before conversion to DistillerModuleList
                module_name = converted_module_names_map[module_name]

                if len(module_name) == 0:
                    # Special case where the module name is an empty string - this happens
                    # when the op is called from the "top-level" of the model
                    new_op['name'] = 'top_level_op'
                else:
                    new_op['name'] = module_name

                # Save the calling module name in the op dict. Denormalize it so it can
                # be directly matched with the actual model
                module_name = distiller.denormalize_module_name(self._src_model, module_name)
                new_op['module-name'] = module_name

                # The node's scope name in the graph corresponds to the module from which the op was called.
                # This means that when ops are invoked from the same module via functional calls or direct
                # operations on tensors, these ops will have the SAME MODEL NAME associated with them.
                # For example:
                #   t = t1 + t2
                #   t = F.relu(t)
                # In this case the add operation and the ReLU operation will have the same name, which is
                # derived from the module they're contained in.
                #
                # Another case where different ops will have the same module name is when a module is reused:
                #   out = self.conv1(x)
                #   out = self.relu(out)    <=== First use of self.relu
                #   out = self.conv2(out)
                #   out = self.relu(out)    <=== Second use of self.relu
                # In this case the graph will have 2 distinct ReLU nodes, with the same scope name.
                #
                # Operators with the same name create very confusing graphs (in ResNet, for example),
                # so we "unroll" them.
                same_module_cnt = len(self.module_ops_map[module_name])
                if same_module_cnt:
                    new_op['name'] += "__" + str(same_module_cnt)
                self.module_ops_map[module_name].append(new_op['name'])

                # Finally we register the new op in the ops collection
                msglogger.debug("new sgraph node - Scope name: {} ; Type: {} ; Display name {}".format(
                    new_op['orig-name'], new_op['type'], new_op['name']))
                self.ops[new_op['name']] = new_op

                for input_ in node.inputs():
                    self.__add_input(new_op, input_)
                    self.edges.append(SummaryGraph.Edge(input_.uniqueName(), new_op['name']))

                for output in node.outputs():
                    self.__add_output(new_op, output)
                    self.edges.append(SummaryGraph.Edge(new_op['name'], output.uniqueName()))

                new_op['attrs'] = OrderedDict([(attr_name, node[attr_name]) for attr_name in node.attributeNames()])

        self.__merge_pad_avgpool()
        self.add_macs_attr()
        self.add_footprint_attr()
        self.add_arithmetic_intensity_attr()
        del model_clone
예제 #14
0
 def op_meta(n):
     return OpSimpleMetadata(distiller.denormalize_module_name(self._src_model, n), self.ops[n]['type'])
예제 #15
0
def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing channels from Convolution layers.
    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has channels that are all zeros.
    For each weights tensor that has at least one zero-channel, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the channels.
    """
    msglogger.info("Invoking create_thinning_recipe_channels")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name: m for mod_name, m in model.named_modules()}

    # Traverse all of the model's parameters, search for zero-channels, and
    # create a thinning recipe that descibes the required changes to the model.
    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights (of Convolution layers)
        if param.dim() != 4:
            continue

        num_channels = param.size(1)
        nonzero_channels = find_nonzero_channels(param, param_name)
        num_nnz_channels = nonzero_channels.nelement()
        if num_nnz_channels == 0:
            raise ValueError(
                "Trying to set zero channels for parameter %s is not allowed" %
                param_name)
        # If there are non-zero channels in this tensor then continue to next tensor
        if num_channels <= num_nnz_channels:
            continue

        # We are removing channels, so update the number of incoming channels (IFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(model,
                                thinning_recipe,
                                layer_name,
                                key='in_channels',
                                val=num_nnz_channels)

        # Select only the non-zero filters
        indices = nonzero_channels.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (1, indices))

        # Find all instances of Convolution layers that immediately preceed this layer
        predecessors = sgraph.predecessors_f(normalize_module_name(layer_name),
                                             ['Conv'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        predecessors = [
            normalize_module_name(predecessor) for predecessor in predecessors
        ]
        if len(predecessors) == 0:
            msglogger.info(
                "Could not find predecessors for name={} normal={} {}".format(
                    layer_name, normalize_module_name(layer_name),
                    denormalize_module_name(model, layer_name)))
        for predecessor in predecessors:
            # For each of the convolutional layers that preceed, we have to reduce the number of output channels.
            append_module_directive(model,
                                    thinning_recipe,
                                    predecessor,
                                    key='out_channels',
                                    val=num_nnz_channels)

            # Now remove channels from the weights tensor of the predecessor conv
            append_param_directive(
                thinning_recipe,
                denormalize_module_name(model, predecessor) + '.weight',
                (0, indices))

            if layers[denormalize_module_name(model,
                                              predecessor)].bias is not None:
                # This convolution has bias coefficients
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, predecessor) + '.bias',
                    (0, indices))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.predecessors_f(normalize_module_name(layer_name),
                                          ['BatchNormalization'])
        if len(bn_layers) > 0:
            # if len(bn_layers) != 1:
            #     raise RuntimeError("{} should have exactly one BN predecessors, but has {}".format(layer_name, len(bn_layers)))
            for bn_layer in bn_layers:
                # Thinning of the BN layer that follows the convolution
                bn_layer_name = denormalize_module_name(model, bn_layer)
                msglogger.debug(
                    "[recipe] {}: predecessor BN module = {}".format(
                        layer_name, bn_layer_name))
                append_bn_thinning_directive(
                    thinning_recipe,
                    layers,
                    bn_layer_name,
                    len_thin_features=num_nnz_channels,
                    thin_features=indices)

    msglogger.debug(thinning_recipe)
    return thinning_recipe
예제 #16
0
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing filters from Convolution layers.
    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has filters that are all zeros.
    For each weights tensor that has at least one zero-filter, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the filters.
    """
    msglogger.info("Invoking create_thinning_recipe_filters")
    msglogger.debug(sgraph.ops.keys())

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name: m for mod_name, m in model.named_modules()}
    """
    log 2018-09-19 CKH
        如果是1x1和dwconv3x3相连, 都要做pruning filter, 那在thinning的时候, 对于dwconv3x3, 它要按照前向结点thinning一次param, 再按本身
    的recipe再thinning一次, 但是dwconv3x3实际上param只有一个维度, 比如32x32的dwconv3x3, parameter只有[32,1], 那就不可能thinning两次
    因为按前向结点thinning的时候, 要thinning一个filter, in和out同时都被thinning掉了, 因为in&out之间只有一条线相连, 所以这种情况就不要做
    dwconv3x3的thinning, 做1x1的thinning, 就达到对3x3 filter pruning的目的了
    """

    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights
        if param.dim() != 4:
            continue

        # Find the number of zero-valued filters in this weights tensor
        filter_view = param.view(param.size(0), -1)
        num_filters = filter_view.size()[0]
        nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1))
        num_nnz_filters = nonzero_filters.nelement()
        if num_nnz_filters == 0:
            raise ValueError(
                "Trying to set zero filters for parameter %s is not allowed" %
                param_name)
        # If there are non-zero filters in this tensor then continue to next tensor
        if num_filters <= num_nnz_filters:
            msglogger.debug("SKipping {} shape={}".format(
                param_name_2_layer_name(param_name), param.shape))
            continue

        msglogger.info("In tensor %s found %d/%d zero filters", param_name,
                       num_filters - num_nnz_filters, num_filters)

        # We are removing filters, so update the number of outgoing channels (OFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        # 改变architecture的in_ch和out_ch
        append_module_directive(model,
                                thinning_recipe,
                                layer_name,
                                key='out_channels',
                                val=num_nnz_filters)

        # Select only the non-zero filters
        indices = nonzero_filters.data.squeeze()
        # 改变parameter tensor的维度大小
        append_param_directive(thinning_recipe, param_name, (0, indices))

        if layers[layer_name].bias is not None:
            # This convolution has bias coefficients
            append_param_directive(thinning_recipe, layer_name + '.bias',
                                   (0, indices))

        # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer
        msglogger.debug("{} => {}".format(layer_name,
                                          normalize_module_name(layer_name)))

        # Add type name before put in successors_f 2018-09-19 CKH
        norm_module_name = normalize_module_name(layer_name)

        # 可以考虑用isinstance(layers[successor], torch.nn.modules.Conv2d)的方式来判断module的type 2018-09-19 CKH
        if isinstance(layers[norm_module_name], torch.nn.modules.Conv2d):
            norm_module_name += '.Conv'

        successors = sgraph.successors_f(norm_module_name, ['Conv', 'Gemm'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        successors = ['.'.join(succs.split('.')[0:-1]) for succs in successors]
        successors = [
            denormalize_module_name(model, successor)
            for successor in successors
        ]
        for successor in successors:

            if isinstance(layers[successor], torch.nn.modules.Conv2d):
                # 如果遇到successor是dwconv, 需要把dw的out_ch也改掉(只改architecture), 同时把dwconv的后向结点的in_ch也改掉
                # (architecture和parameter维度都要改), 先默认dwcon不会紧跟一个dwconv 2018-09-19 CKH
                successor_norm_module_name = normalize_module_name(successor)
                if isinstance(layers[successor_norm_module_name],
                              torch.nn.modules.Conv2d):
                    successor_norm_module_name += '.Conv'

                if layers[successor].groups == layers[successor].in_channels:
                    append_module_directive(model,
                                            thinning_recipe,
                                            successor,
                                            key='in_channels',
                                            val=num_nnz_filters)
                    append_module_directive(model,
                                            thinning_recipe,
                                            successor,
                                            key='out_channels',
                                            val=num_nnz_filters)
                    layers[successor].groups = num_nnz_filters
                    msglogger.debug(
                        "[recipe] {}: setting in_channels = {}".format(
                            successor, num_nnz_filters))

                    # Now remove channels from the weights tensor of the successor conv
                    append_param_directive(
                        thinning_recipe,
                        denormalize_module_name(model, successor) + '.weight',
                        (0, indices))

                    # 对于1x1后面的depthwise3x3, 在前向1x1的output_channel被剪掉后, 需要①剪掉input_channel②剪掉output_channel
                    # ③剪掉bias, 这三个操作的mask和1x1的mask都一样, 也就是1x1的output_channel一动, 跟着后面depthwise3x3全部要动
                    # 2018-09-20 CKH
                    if layers[successor].bias is not None:
                        # This convolution has bias coefficients
                        append_param_directive(thinning_recipe,
                                               successor + '.bias',
                                               (0, indices))

                    successors2 = sgraph.successors_f(
                        successor_norm_module_name, ['Conv', 'Gemm'])
                    # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
                    successors2 = [
                        '.'.join(succs.split('.')[0:-1])
                        for succs in successors2
                    ]
                    successors2 = [
                        denormalize_module_name(model, successor)
                        for successor in successors2
                    ]

                    for successor2 in successors2:
                        if isinstance(layers[successor2],
                                      torch.nn.modules.Conv2d):
                            append_module_directive(model,
                                                    thinning_recipe,
                                                    successor2,
                                                    key='in_channels',
                                                    val=num_nnz_filters)
                            msglogger.debug(
                                "[recipe] {}: setting in_channels = {}".format(
                                    successor, num_nnz_filters))
                            append_param_directive(
                                thinning_recipe,
                                denormalize_module_name(model, successor2) +
                                '.weight', (1, indices))
                else:
                    # For each of the convolutional layers that follow, we have to reduce the number of input channels.
                    append_module_directive(model,
                                            thinning_recipe,
                                            successor,
                                            key='in_channels',
                                            val=num_nnz_filters)
                    msglogger.debug(
                        "[recipe] {}: setting in_channels = {}".format(
                            successor, num_nnz_filters))

                    # Now remove channels from the weights tensor of the successor conv
                    append_param_directive(
                        thinning_recipe,
                        denormalize_module_name(model, successor) + '.weight',
                        (1, indices))

            elif isinstance(layers[successor], torch.nn.modules.Linear):
                # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member
                fm_size = layers[successor].in_features // layers[
                    layer_name].out_channels
                in_features = fm_size * num_nnz_filters
                append_module_directive(model,
                                        thinning_recipe,
                                        successor,
                                        key='in_features',
                                        val=in_features)
                msglogger.debug("[recipe] {}: setting in_features = {}".format(
                    successor, in_features))

                # Now remove channels from the weights tensor of the successor FC layer:
                # This is a bit tricky:
                fm_height = fm_width = int(math.sqrt(fm_size))
                view_4D = (layers[successor].out_features,
                           layers[layer_name].out_channels, fm_height,
                           fm_width)
                view_2D = (layers[successor].out_features, in_features)
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, successor) + '.weight',
                    (1, indices, view_4D, view_2D))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.successors_f(normalize_module_name(layer_name),
                                        ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe,
                        layers,
                        bn_layer_name,
                        len_thin_features=num_nnz_filters,
                        thin_features=indices)
    return thinning_recipe