예제 #1
0
def test_normalize_module_name():
    def name_test(dataset, arch):
        model = create_model(False, dataset, arch, parallel=False)
        modelp = create_model(False, dataset, arch, parallel=True)
        assert model is not None and modelp is not None

        mod_names = [mod_name for mod_name, _ in model.named_modules()]
        mod_names_p = [mod_name for mod_name, _ in modelp.named_modules()]
        assert mod_names is not None and mod_names_p is not None
        assert len(mod_names) + 1 == len(mod_names_p)

        for i in range(len(mod_names) - 1):
            assert mod_names[i + 1] == normalize_module_name(mod_names_p[i +
                                                                         2])
            logging.debug("{} {} {}".format(
                mod_names_p[i + 2], mod_names[i + 1],
                normalize_module_name(mod_names_p[i + 2])))
            assert mod_names_p[i + 2] == denormalize_module_name(
                modelp, mod_names[i + 1])

    assert normalize_module_name("features.module.0") == "features.0"
    assert normalize_module_name("module.features.0") == "features.0"
    assert normalize_module_name("features.module") == "features"
    assert normalize_module_name('module') == ''
    assert normalize_module_name(
        'no.parallel.modules') == 'no.parallel.modules'
    name_test('imagenet', 'vgg19')
    name_test('cifar10', 'resnet20_cifar')
    name_test('imagenet', 'alexnet')
예제 #2
0
def test_normalize_module_name():
    assert "features.0" == normalize_module_name("features.module.0")
    assert "features.0" == normalize_module_name("module.features.0")
    assert "features" == normalize_module_name("features.module")
    name_test('imagenet', 'vgg19')
    name_test('cifar10', 'resnet20_cifar')
    name_test('imagenet', 'alexnet')
예제 #3
0
def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing channels from Convolution layers.

    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has channels that are all zeros.
    For each weights tensor that has at least one zero-channel, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the channels.
    """
    msglogger.info("Invoking create_thinning_recipe_channels")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name : m for mod_name, m in model.named_modules()}

    # Traverse all of the model's parameters, search for zero-channels, and
    # create a thinning recipe that descibes the required changes to the model.
    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights (of Convolution layers)
        if param.dim() != 4:
            continue

        num_channels = param.size(1)
        nonzero_channels = find_nonzero_channels(param, param_name)

        # If there are non-zero channels in this tensor then continue to next tensor
        if num_channels <= len(nonzero_channels):
            continue

        # We are removing channels, so update the number of incoming channels (IFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(thinning_recipe, layer_name, key='in_channels', val=len(nonzero_channels))

        # Select only the non-zero filters
        indices = nonzero_channels.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (1, indices))

        # Find all instances of Convolution layers that immediately preceed this layer
        predecessors = sgraph.predecessors_f(normalize_module_name(layer_name), ['Conv'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        predecessors = [denormalize_module_name(model, predecessor) for predecessor in predecessors]
        for predecessor in predecessors:
            # For each of the convolutional layers that preceed, we have to reduce the number of output channels.
            append_module_directive(thinning_recipe, predecessor, key='out_channels', val=len(nonzero_channels))

            # Now remove channels from the weights tensor of the successor conv
            append_param_directive(thinning_recipe, predecessor+'.weight', (0, indices))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.predecessors_f(normalize_module_name(layer_name), ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe, layers, bn_layer_name, len_thin_features=len(nonzero_channels), thin_features=indices)

    return thinning_recipe
예제 #4
0
def get_normalized_recipe(recipe):
    return ThinningRecipe(
        modules={
            distiller.normalize_module_name(k): v for k, v in recipe.modules.items()
        },
        parameters={
            distiller.normalize_module_name(k): v for k, v in recipe.parameters.items()
        },
    )
예제 #5
0
def test_simplenet():
    g = create_graph('cifar10', 'simplenet_cifar')
    assert g is not None
    preds = g.predecessors_f(normalize_module_name('module.conv1'), 'Conv')
    logging.debug("[simplenet_cifar]: preds of module.conv1 = {}".format(preds))
    assert len(preds) == 0

    preds = g.predecessors_f(normalize_module_name('module.conv2'), 'Conv')
    logging.debug("[simplenet_cifar]: preds of module.conv2 = {}".format(preds))
    assert len(preds) == 1
예제 #6
0
def get_normalized_recipe(recipe):
    new_recipe = ThinningRecipe(modules={
        normalize_module_name(k): v
        for k, v in recipe.modules.items()
    },
                                parameters={
                                    normalize_module_name(k): v
                                    for k, v in recipe.parameters.items()
                                })
    return new_recipe
예제 #7
0
def name_test(dataset, arch):
    model = create_model(False, dataset, arch, parallel=False)
    modelp = create_model(False, dataset, arch, parallel=True)
    assert model is not None and modelp is not None

    mod_names   = [mod_name for mod_name, _ in model.named_modules()]
    mod_names_p = [mod_name for mod_name, _ in modelp.named_modules()]
    assert mod_names is not None and mod_names_p is not None
    assert len(mod_names)+1 == len(mod_names_p)

    for i in range(len(mod_names)-1):
        assert mod_names[i+1] == normalize_module_name(mod_names_p[i+2])
        logging.debug("{} {} {}".format(mod_names_p[i+2], mod_names[i+1], normalize_module_name(mod_names_p[i+2])))
        assert mod_names_p[i+2] == denormalize_module_name(modelp, mod_names[i+1])
예제 #8
0
    def log_model_buffers(self, model, buffer_names, tag_prefix, epoch, completed, total, freq):
        """Logs values of model buffers.

        Notes:
            1. Each buffer provided in 'buffer_names' is displayed in a separate table.
            2. Within each table, each value is displayed in a separate column.
        """
        datas = {name: [] for name in buffer_names}
        maxlens = {name: 0 for name in buffer_names}
        for n, m in model.named_modules():
            for buffer_name in buffer_names:
                try:
                    p = getattr(m, buffer_name)
                except AttributeError:
                    continue
                data = datas[buffer_name]
                values = p if isinstance(p, (list, torch.nn.ParameterList)) else p.view(-1).tolist()
                data.append([distiller.normalize_module_name(n) + '.' + buffer_name, *values])
                maxlens[buffer_name] = max(maxlens[buffer_name], len(values))

        for name in buffer_names:
            if datas[name]:
                headers = ['Layer'] + ['Val_' + str(i) for i in range(maxlens[name])]
                t = tabulate.tabulate(datas[name], headers=headers, tablefmt='psql', floatfmt='.4f')
                msglogger.info('\n' + name.upper() + ': (Epoch {0}, Step {1})\n'.format(epoch, completed) + t)
예제 #9
0
    def log_model_buffers(self, model, buffer_names, tag_prefix, epoch, completed, total, freq):
        """Logs values of model buffers.

        Notes:
            1. Each buffer provided is logged in a separate CSV file
            2. Each CSV file is continuously updated during the run.
            3. In each call, a line is appended for each layer (i.e. module) containing the named buffers.
        """
        with ExitStack() as stack:
            files = {}
            writers = {}
            for buf_name in buffer_names:
                fname = self.get_fname(buf_name)
                new = not os.path.isfile(fname)
                files[buf_name] = stack.enter_context(open(fname, 'a'))
                writer = csv.writer(files[buf_name])
                if new:
                    writer.writerow(['Layer', 'Epoch', 'Step', 'Total', 'Values'])
                writers[buf_name] = writer

            for n, m in model.named_modules():
                for buffer_name in buffer_names:
                    try:
                        p = getattr(m, buffer_name)
                    except AttributeError:
                        continue
                    writer = writers[buffer_name]
                    if isinstance(p, (list, torch.nn.ParameterList)):
                        values = []
                        for v in p:
                            values += v.view(-1).tolist()
                    else:
                        values = p.view(-1).tolist()
                    writer.writerow([distiller.normalize_module_name(n) + '.' + buffer_name,
                                     epoch, completed, int(total)] + values)
예제 #10
0
def collect_conv_details(model, dataset):
    dummy_input = get_dummy_input(dataset)
    g = SummaryGraph(model.cuda(), dummy_input.cuda())
    conv_layers = OrderedDict()
    total_macs = 0
    total_nnz = 0
    for id, (name, m) in enumerate(model.named_modules()):
        if isinstance(m, torch.nn.Conv2d):
            conv = SimpleNamespace()
            conv.t = len(conv_layers)
            conv.k = m.kernel_size[0]
            conv.stride = m.stride

            # Use the SummaryGraph to obtain some other details of the models
            conv_op = g.find_op(normalize_module_name(name))
            assert conv_op is not None

            conv.weights_vol = conv_op['attrs']['weights_vol']
            total_nnz += conv.weights_vol
            conv.macs = conv_op['attrs']['MACs']
            conv_pname = name + ".weight"
            conv_p = distiller.model_find_param(model, conv_pname)
            conv.macs *= distiller.density_ch(conv_p)
            total_macs += conv.macs

            conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
            conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
            conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
            conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]

            conv.name = name
            conv.id = id
            conv_layers[len(conv_layers)] = conv

    return conv_layers, total_macs, total_nnz
예제 #11
0
파일: ADC.py 프로젝트: chanmi168/distiller
def collect_conv_details(model, dataset):
    if dataset == 'imagenet':
        dummy_input = torch.randn(1, 3, 224, 224)
    elif dataset == 'cifar10':
        dummy_input = torch.randn(1, 3, 32, 32)
    else:
        raise ValueError("dataset %s is not supported" % dataset)

    g = SummaryGraph(model.cuda(), dummy_input.cuda())
    conv_layers = OrderedDict()
    total_macs = 0
    for id, (name, m) in enumerate(model.named_modules()):
        if isinstance(m, torch.nn.Conv2d):
            conv = SimpleNamespace()
            conv.t = len(conv_layers)
            conv.k = m.kernel_size[0]
            conv.stride = m.stride

            # Use the SummaryGraph to obtain some other details of the models
            conv_op = g.find_op(normalize_module_name(name))
            assert conv_op is not None

            conv.macs = conv_op['attrs']['MACs']
            total_macs += conv.macs
            conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
            conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
            conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
            conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]

            conv.name = name
            conv.id = id
            conv_layers[len(conv_layers)] = conv

    return conv_layers, total_macs
예제 #12
0
    def successors_f(self,
                     node_name,
                     successors_types,
                     done_list=None,
                     logging=None,
                     denorm_names=True):
        """Returns a list of <op>'s successors, if they match the <successors_types> criteria.

        Traverse the graph, starting at node <node_name>, and search for successor
        nodes, that have one of the node types listed in <successors_types>.
        If none is found, then return an empty list.

        <node_name> and the returned list of successors are strings, because
        """
        node_name = distiller.normalize_module_name(node_name)
        node = self.find_op(node_name)
        node_is_an_op = True
        if node is None:
            node_is_an_op = False
            node = self.find_param(node_name)
            if node is None:
                msglogger.warning(
                    "successors_f: Could not find node {}".format(node_name))
                return []

        if done_list is None:
            done_list = []

        done_list.append(node_name)

        if not isinstance(successors_types, list):
            successors_types = [successors_types]

        if node_is_an_op:
            # We check if we found the type of node we're looking for,
            # and that this is not the first node in our search.
            if node['type'] in successors_types and len(done_list) > 1:
                return [
                    distiller.denormalize_module_name(self._src_model,
                                                      node_name)
                    if denorm_names else node_name
                ]

            # This is an operation node
            succs = [
                edge.dst for edge in self.edges
                if (edge.src == node_name and edge.dst not in done_list)
            ]
        else:
            # This is a data node
            succs = [
                edge.dst for edge in self.edges
                if (edge.src == node_name and edge.dst not in done_list)
            ]
        ret = []
        for successor in succs:
            ret += self.successors_f(successor, successors_types, done_list,
                                     logging, denorm_names)

        return ret
예제 #13
0
 def named_params_layers(self):
     for param_name, param in self._src_model.named_parameters():
         # remove the extension of param_name, and then normalize it
         # to create a normalized layer name
         normalized_layer_name = distiller.normalize_module_name(
             '.'.join(param_name.split('.')[:-1]))
         sgraph_layer_name = distiller.denormalize_module_name(
             self._src_model, normalized_layer_name)
         yield sgraph_layer_name, param_name, param
예제 #14
0
    def predecessors_f(self,
                       node_name,
                       predecessors_types,
                       done_list=None,
                       logging=None):
        """Returns a list of <op>'s predecessors, if they match the <predecessors_types> criteria.
        """
        node_name = distiller.normalize_module_name(node_name)
        node = self.find_op(node_name)
        node_is_an_op = True
        if node is None:
            node_is_an_op = False
            node = self.find_param(node_name)
            if node is None:
                msglogger.warning(
                    "predecessors_f: Could not find node {}".format(node_name))
                return []

        if done_list is None:
            done_list = []

        done_list.append(node_name)

        if not isinstance(predecessors_types, list):
            predecessors_types = [predecessors_types]

        if node_is_an_op:
            # We check if we found the type of node we're looking for,
            # and that this is not the first node in our search.
            if node['type'] in predecessors_types and len(done_list) > 1:
                return [
                    distiller.denormalize_module_name(self._src_model,
                                                      node_name)
                ]

            # This is an operation node
            preds = [
                edge.src for edge in self.edges
                if (edge.dst == node_name and edge.src not in done_list)
            ]
        else:
            # This is a data node
            preds = [
                edge.src for edge in self.edges
                if (edge.dst == node_name and edge.src not in done_list)
            ]
        ret = []
        for predecessor in preds:
            ret += self.predecessors_f(predecessor, predecessors_types,
                                       done_list, logging)

        return [
            distiller.denormalize_module_name(self._src_model, node)
            for node in ret
        ]
def get_model_compute_budget(model, dataset, layers_to_prune=None):
    """Return the compute budget of the Convolution layers in an image-classifier.
    """
    dummy_input = distiller.get_dummy_input(dataset)
    g = SummaryGraph(model, dummy_input)
    total_macs = 0
    for name, m in model.named_modules():
        if isinstance(m, torch.nn.Conv2d):
            # Use the SummaryGraph to obtain some other details of the models
            conv_op = g.find_op(normalize_module_name(name))
            assert conv_op is not None
            total_macs += conv_op['attrs']['MACs']
    del g
    return total_macs
예제 #16
0
    def make_fc(model, fc_module, g, name, seq_id, layer_id):
        fc = SimpleNamespace()
        fc.name = name
        fc.id = layer_id
        fc.t = seq_id

        # Use the SummaryGraph to obtain some other details of the models
        fc_op = g.find_op(normalize_module_name(name))
        assert fc_op is not None

        fc.weights_vol = fc_op['attrs']['weights_vol']
        fc.macs = fc_op['attrs']['MACs']
        fc.n_ofm = fc_op['attrs']['n_ofm']
        fc.n_ifm = fc_op['attrs']['n_ifm']
        fc_pname = name + ".weight"
        fc_p = distiller.model_find_param(model, fc_pname)
        return fc
예제 #17
0
파일: ADC.py 프로젝트: zhepengfei/distiller
def collect_conv_details(model,
                         dataset,
                         perform_thinning,
                         layers_to_prune=None):
    dummy_input = distiller.get_dummy_input(dataset)
    g = SummaryGraph(model, dummy_input)
    conv_layers = OrderedDict()
    total_macs = 0
    total_params = 0
    for id, (name, m) in enumerate(model.named_modules()):
        if isinstance(m, torch.nn.Conv2d):
            conv = SimpleNamespace()
            conv.t = len(conv_layers)
            conv.k = m.kernel_size[0]
            conv.stride = m.stride

            # Use the SummaryGraph to obtain some other details of the models
            conv_op = g.find_op(normalize_module_name(name))
            assert conv_op is not None

            conv.weights_vol = conv_op['attrs']['weights_vol']
            total_params += conv.weights_vol
            conv.macs = conv_op['attrs']['MACs']
            conv_pname = name + ".weight"
            conv_p = distiller.model_find_param(model, conv_pname)
            if not perform_thinning:
                #conv.macs *= distiller.density_ch(conv_p)  # Channel pruning
                conv.macs *= distiller.density_3D(conv_p)  # Filter pruning
            total_macs += conv.macs

            conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
            conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
            conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
            conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]

            conv.name = name
            conv.id = id
            if layers_to_prune is None or name in layers_to_prune:
                conv_layers[len(conv_layers)] = conv
    return conv_layers, total_macs, total_params
예제 #18
0
    def make_conv(model, conv_module, g, name, seq_id, layer_id):
        conv = SimpleNamespace()
        conv.name = name
        conv.id = layer_id
        conv.t = seq_id
        conv.k = conv_module.kernel_size[0]
        conv.stride = conv_module.stride

        # Use the SummaryGraph to obtain some other details of the models
        conv_op = g.find_op(normalize_module_name(name))
        assert conv_op is not None

        conv.weights_vol = conv_op['attrs']['weights_vol']
        conv.macs = conv_op['attrs']['MACs']
        conv.n_ofm = conv_op['attrs']['n_ofm']
        conv.n_ifm = conv_op['attrs']['n_ifm']
        conv_pname = name + ".weight"
        conv_p = distiller.model_find_param(model, conv_pname)
        conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
        conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
        conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
        conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]
        return conv
예제 #19
0
 def dedicated_module_check(n):
     module_name = self.ops[distiller.normalize_module_name(
         n)]['module-name']
     return len(self.module_ops_map[module_name]
                ) == 1 or not dedicated_modules_only
예제 #20
0
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing filters from Convolution layers.
    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has filters that are all zeros.
    For each weights tensor that has at least one zero-filter, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the filters.
    """
    msglogger.info("Invoking create_thinning_recipe_filters")
    msglogger.debug(sgraph.ops.keys())

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name: m for mod_name, m in model.named_modules()}
    """
    log 2018-09-19 CKH
        如果是1x1和dwconv3x3相连, 都要做pruning filter, 那在thinning的时候, 对于dwconv3x3, 它要按照前向结点thinning一次param, 再按本身
    的recipe再thinning一次, 但是dwconv3x3实际上param只有一个维度, 比如32x32的dwconv3x3, parameter只有[32,1], 那就不可能thinning两次
    因为按前向结点thinning的时候, 要thinning一个filter, in和out同时都被thinning掉了, 因为in&out之间只有一条线相连, 所以这种情况就不要做
    dwconv3x3的thinning, 做1x1的thinning, 就达到对3x3 filter pruning的目的了
    """

    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights
        if param.dim() != 4:
            continue

        # Find the number of zero-valued filters in this weights tensor
        filter_view = param.view(param.size(0), -1)
        num_filters = filter_view.size()[0]
        nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1))
        num_nnz_filters = nonzero_filters.nelement()
        if num_nnz_filters == 0:
            raise ValueError(
                "Trying to set zero filters for parameter %s is not allowed" %
                param_name)
        # If there are non-zero filters in this tensor then continue to next tensor
        if num_filters <= num_nnz_filters:
            msglogger.debug("SKipping {} shape={}".format(
                param_name_2_layer_name(param_name), param.shape))
            continue

        msglogger.info("In tensor %s found %d/%d zero filters", param_name,
                       num_filters - num_nnz_filters, num_filters)

        # We are removing filters, so update the number of outgoing channels (OFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        # 改变architecture的in_ch和out_ch
        append_module_directive(model,
                                thinning_recipe,
                                layer_name,
                                key='out_channels',
                                val=num_nnz_filters)

        # Select only the non-zero filters
        indices = nonzero_filters.data.squeeze()
        # 改变parameter tensor的维度大小
        append_param_directive(thinning_recipe, param_name, (0, indices))

        if layers[layer_name].bias is not None:
            # This convolution has bias coefficients
            append_param_directive(thinning_recipe, layer_name + '.bias',
                                   (0, indices))

        # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer
        msglogger.debug("{} => {}".format(layer_name,
                                          normalize_module_name(layer_name)))

        # Add type name before put in successors_f 2018-09-19 CKH
        norm_module_name = normalize_module_name(layer_name)

        # 可以考虑用isinstance(layers[successor], torch.nn.modules.Conv2d)的方式来判断module的type 2018-09-19 CKH
        if isinstance(layers[norm_module_name], torch.nn.modules.Conv2d):
            norm_module_name += '.Conv'

        successors = sgraph.successors_f(norm_module_name, ['Conv', 'Gemm'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        successors = ['.'.join(succs.split('.')[0:-1]) for succs in successors]
        successors = [
            denormalize_module_name(model, successor)
            for successor in successors
        ]
        for successor in successors:

            if isinstance(layers[successor], torch.nn.modules.Conv2d):
                # 如果遇到successor是dwconv, 需要把dw的out_ch也改掉(只改architecture), 同时把dwconv的后向结点的in_ch也改掉
                # (architecture和parameter维度都要改), 先默认dwcon不会紧跟一个dwconv 2018-09-19 CKH
                successor_norm_module_name = normalize_module_name(successor)
                if isinstance(layers[successor_norm_module_name],
                              torch.nn.modules.Conv2d):
                    successor_norm_module_name += '.Conv'

                if layers[successor].groups == layers[successor].in_channels:
                    append_module_directive(model,
                                            thinning_recipe,
                                            successor,
                                            key='in_channels',
                                            val=num_nnz_filters)
                    append_module_directive(model,
                                            thinning_recipe,
                                            successor,
                                            key='out_channels',
                                            val=num_nnz_filters)
                    layers[successor].groups = num_nnz_filters
                    msglogger.debug(
                        "[recipe] {}: setting in_channels = {}".format(
                            successor, num_nnz_filters))

                    # Now remove channels from the weights tensor of the successor conv
                    append_param_directive(
                        thinning_recipe,
                        denormalize_module_name(model, successor) + '.weight',
                        (0, indices))

                    # 对于1x1后面的depthwise3x3, 在前向1x1的output_channel被剪掉后, 需要①剪掉input_channel②剪掉output_channel
                    # ③剪掉bias, 这三个操作的mask和1x1的mask都一样, 也就是1x1的output_channel一动, 跟着后面depthwise3x3全部要动
                    # 2018-09-20 CKH
                    if layers[successor].bias is not None:
                        # This convolution has bias coefficients
                        append_param_directive(thinning_recipe,
                                               successor + '.bias',
                                               (0, indices))

                    successors2 = sgraph.successors_f(
                        successor_norm_module_name, ['Conv', 'Gemm'])
                    # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
                    successors2 = [
                        '.'.join(succs.split('.')[0:-1])
                        for succs in successors2
                    ]
                    successors2 = [
                        denormalize_module_name(model, successor)
                        for successor in successors2
                    ]

                    for successor2 in successors2:
                        if isinstance(layers[successor2],
                                      torch.nn.modules.Conv2d):
                            append_module_directive(model,
                                                    thinning_recipe,
                                                    successor2,
                                                    key='in_channels',
                                                    val=num_nnz_filters)
                            msglogger.debug(
                                "[recipe] {}: setting in_channels = {}".format(
                                    successor, num_nnz_filters))
                            append_param_directive(
                                thinning_recipe,
                                denormalize_module_name(model, successor2) +
                                '.weight', (1, indices))
                else:
                    # For each of the convolutional layers that follow, we have to reduce the number of input channels.
                    append_module_directive(model,
                                            thinning_recipe,
                                            successor,
                                            key='in_channels',
                                            val=num_nnz_filters)
                    msglogger.debug(
                        "[recipe] {}: setting in_channels = {}".format(
                            successor, num_nnz_filters))

                    # Now remove channels from the weights tensor of the successor conv
                    append_param_directive(
                        thinning_recipe,
                        denormalize_module_name(model, successor) + '.weight',
                        (1, indices))

            elif isinstance(layers[successor], torch.nn.modules.Linear):
                # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member
                fm_size = layers[successor].in_features // layers[
                    layer_name].out_channels
                in_features = fm_size * num_nnz_filters
                append_module_directive(model,
                                        thinning_recipe,
                                        successor,
                                        key='in_features',
                                        val=in_features)
                msglogger.debug("[recipe] {}: setting in_features = {}".format(
                    successor, in_features))

                # Now remove channels from the weights tensor of the successor FC layer:
                # This is a bit tricky:
                fm_height = fm_width = int(math.sqrt(fm_size))
                view_4D = (layers[successor].out_features,
                           layers[layer_name].out_channels, fm_height,
                           fm_width)
                view_2D = (layers[successor].out_features, in_features)
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, successor) + '.weight',
                    (1, indices, view_4D, view_2D))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.successors_f(normalize_module_name(layer_name),
                                        ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe,
                        layers,
                        bn_layer_name,
                        len_thin_features=num_nnz_filters,
                        thin_features=indices)
    return thinning_recipe
예제 #21
0
def create_thinning_recipe_filters(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing filters from Convolution layers.
    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has filters that are all zeros.
    For each weights tensor that has at least one zero-filter, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the filters.
    """
    msglogger.info("Invoking create_thinning_recipe_filters")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name: m for mod_name, m in model.named_modules()}

    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights
        if param.dim() != 4:
            continue

        # Find the number of zero-valued filters in this weights tensor
        filter_view = param.view(param.size(0), -1)
        num_filters = filter_view.size()[0]
        nonzero_filters = torch.nonzero(filter_view.abs().sum(dim=1))
        num_nnz_filters = nonzero_filters.nelement()
        if num_nnz_filters == 0:
            raise ValueError(
                "Trying to set zero filters for parameter %s is not allowed" %
                param_name)
        # If there are non-zero filters in this tensor then continue to next tensor
        if num_filters <= num_nnz_filters:
            msglogger.debug("SKipping {} shape={}".format(
                param_name_2_layer_name(param_name), param.shape))
            continue

        msglogger.info("In tensor %s found %d/%d zero filters", param_name,
                       num_filters - num_nnz_filters, num_filters)

        # We are removing filters, so update the number of outgoing channels (OFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(model,
                                thinning_recipe,
                                layer_name,
                                key='out_channels',
                                val=num_nnz_filters)

        # Select only the non-zero filters
        indices = nonzero_filters.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (0, indices))

        if layers[layer_name].bias is not None:
            # This convolution has bias coefficients
            append_param_directive(thinning_recipe, layer_name + '.bias',
                                   (0, indices))

        # Find all instances of Convolution or FC (GEMM) layers that immediately follow this layer
        successors = sgraph.successors_f(normalize_module_name(layer_name),
                                         ['Conv', 'Gemm'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        successors = [
            denormalize_module_name(model, successor)
            for successor in successors
        ]
        for successor in successors:

            if isinstance(layers[successor], torch.nn.modules.Conv2d):
                # For each of the convolutional layers that follow, we have to reduce the number of input channels.
                append_module_directive(model,
                                        thinning_recipe,
                                        successor,
                                        key='in_channels',
                                        val=num_nnz_filters)
                msglogger.debug("[recipe] {}: setting in_channels = {}".format(
                    successor, num_nnz_filters))

                # Now remove channels from the weights tensor of the successor conv
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, successor) + '.weight',
                    (1, indices))

            elif isinstance(layers[successor], torch.nn.modules.Linear):
                # If a Linear (Fully-Connected) layer follows, we need to update it's in_features member
                fm_size = layers[successor].in_features // layers[
                    layer_name].out_channels
                in_features = fm_size * num_nnz_filters
                append_module_directive(model,
                                        thinning_recipe,
                                        successor,
                                        key='in_features',
                                        val=in_features)
                msglogger.debug("[recipe] {}: setting in_features = {}".format(
                    successor, in_features))

                # Now remove channels from the weights tensor of the successor FC layer:
                # This is a bit tricky:
                fm_height = fm_width = int(math.sqrt(fm_size))
                view_4D = (layers[successor].out_features,
                           layers[layer_name].out_channels, fm_height,
                           fm_width)
                view_2D = (layers[successor].out_features, in_features)
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, successor) + '.weight',
                    (1, indices, view_4D, view_2D))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.successors_f(normalize_module_name(layer_name),
                                        ['BatchNormalization'])
        if len(bn_layers) > 0:
            assert len(bn_layers) == 1
            # Thinning of the BN layer that follows the convolution
            bn_layer_name = denormalize_module_name(model, bn_layers[0])
            bn_thinning(thinning_recipe,
                        layers,
                        bn_layer_name,
                        len_thin_features=num_nnz_filters,
                        thin_features=indices)
    return thinning_recipe
예제 #22
0
 def find_op(self, lost_op_name):
     return self.ops.get(distiller.normalize_module_name(lost_op_name), None)
예제 #23
0
def create_thinning_recipe_channels(sgraph, model, zeros_mask_dict):
    """Create a recipe for removing channels from Convolution layers.
    The 4D weights of the model parameters (i.e. the convolution parameters) are
    examined one by one, to determine which has channels that are all zeros.
    For each weights tensor that has at least one zero-channel, we create a
    "thinning recipe".
    The thinning recipe contains meta-instructions of how the model
    should be changed in order to remove the channels.
    """
    msglogger.info("Invoking create_thinning_recipe_channels")

    thinning_recipe = ThinningRecipe(modules={}, parameters={})
    layers = {mod_name: m for mod_name, m in model.named_modules()}

    # Traverse all of the model's parameters, search for zero-channels, and
    # create a thinning recipe that descibes the required changes to the model.
    for param_name, param in model.named_parameters():
        # We are only interested in 4D weights (of Convolution layers)
        if param.dim() != 4:
            continue

        num_channels = param.size(1)
        nonzero_channels = find_nonzero_channels(param, param_name)
        num_nnz_channels = nonzero_channels.nelement()
        if num_nnz_channels == 0:
            raise ValueError(
                "Trying to set zero channels for parameter %s is not allowed" %
                param_name)
        # If there are non-zero channels in this tensor then continue to next tensor
        if num_channels <= num_nnz_channels:
            continue

        # We are removing channels, so update the number of incoming channels (IFMs)
        # in the convolutional layer
        layer_name = param_name_2_layer_name(param_name)
        assert isinstance(layers[layer_name], torch.nn.modules.Conv2d)
        append_module_directive(model,
                                thinning_recipe,
                                layer_name,
                                key='in_channels',
                                val=num_nnz_channels)

        # Select only the non-zero filters
        indices = nonzero_channels.data.squeeze()
        append_param_directive(thinning_recipe, param_name, (1, indices))

        # Find all instances of Convolution layers that immediately preceed this layer
        predecessors = sgraph.predecessors_f(normalize_module_name(layer_name),
                                             ['Conv'])
        # Convert the layers names to PyTorch's convoluted naming scheme (when DataParallel is used)
        predecessors = [
            normalize_module_name(predecessor) for predecessor in predecessors
        ]
        if len(predecessors) == 0:
            msglogger.info(
                "Could not find predecessors for name={} normal={} {}".format(
                    layer_name, normalize_module_name(layer_name),
                    denormalize_module_name(model, layer_name)))
        for predecessor in predecessors:
            # For each of the convolutional layers that preceed, we have to reduce the number of output channels.
            append_module_directive(model,
                                    thinning_recipe,
                                    predecessor,
                                    key='out_channels',
                                    val=num_nnz_channels)

            # Now remove channels from the weights tensor of the predecessor conv
            append_param_directive(
                thinning_recipe,
                denormalize_module_name(model, predecessor) + '.weight',
                (0, indices))

            if layers[denormalize_module_name(model,
                                              predecessor)].bias is not None:
                # This convolution has bias coefficients
                append_param_directive(
                    thinning_recipe,
                    denormalize_module_name(model, predecessor) + '.bias',
                    (0, indices))

        # Now handle the BatchNormalization layer that follows the convolution
        bn_layers = sgraph.predecessors_f(normalize_module_name(layer_name),
                                          ['BatchNormalization'])
        if len(bn_layers) > 0:
            # if len(bn_layers) != 1:
            #     raise RuntimeError("{} should have exactly one BN predecessors, but has {}".format(layer_name, len(bn_layers)))
            for bn_layer in bn_layers:
                # Thinning of the BN layer that follows the convolution
                bn_layer_name = denormalize_module_name(model, bn_layer)
                msglogger.debug(
                    "[recipe] {}: predecessor BN module = {}".format(
                        layer_name, bn_layer_name))
                append_bn_thinning_directive(
                    thinning_recipe,
                    layers,
                    bn_layer_name,
                    len_thin_features=num_nnz_channels,
                    thin_features=indices)

    msglogger.debug(thinning_recipe)
    return thinning_recipe