Ejemplo n.º 1
0
def get_network_madds(backbone, neck, head, input_size, logger, search=False):
    input_data = torch.randn((
        2,
        3,
    ) + input_size).cuda()
    backbone_madds, backbone_data = comp_multadds_fw(backbone, input_data)
    backbone_params = count_parameters_in_MB(backbone)
    if neck is not None:
        neck_madds, neck_data = comp_multadds_fw(neck, backbone_data)
        neck_params = count_parameters_in_MB(neck)
    else:
        neck_madds = 0.
        neck_params = 0.
        neck_data = backbone_data
    if hasattr(head, 'search') and search:
        head.search = False
    head_madds, _ = comp_multadds_fw(head, neck_data)
    head_params = count_parameters_in_MB(head)
    if hasattr(head, 'search') and search:
        head.search = True
    total_madds = backbone_madds + neck_madds + head_madds
    total_params = backbone_params + neck_params + head_params

    logger.info(
        "Derived Mult-Adds: [Backbone] %.2fGB [Neck] %.2fGB [Head] %.2fGB [Total] %.2fGB",
        backbone_madds / 1e3, neck_madds / 1e3, head_madds / 1e3,
        total_madds / 1e3)
    logger.info(
        "Derived Num Params: [Backbone] %.2fMB [Neck] %.2fMB [Head] %.2fMB [Total] %.2fMB",
        backbone_params, neck_params, head_params, total_params)
Ejemplo n.º 2
0
    def get_flops_list(self, input_shape):
        data = torch.randn(input_shape)
        block_flops = []
        data = self.blocks[0](data)
        data = self.blocks[1](data)

        for block in self.blocks[2:]:
            layer_flops = []
            if hasattr(block, 'layers'):
                for layer in block.layers:
                    op_flops = []
                    for op in layer._ops:
                        flops, op_data = comp_multadds_fw(op, data, 'B', 'cpu')
                        op_flops.append(flops)
                    data = op_data
                    layer_flops.append(op_flops)
                block_flops.append(layer_flops)
        return block_flops
Ejemplo n.º 3
0
    def get_cost_list(self,
                      data_shape,
                      cost_type='flops',
                      use_gpu=True,
                      meas_times=1000):
        cost_list = []
        block_datas = []
        total_cost = 0
        if cost_type == 'flops':
            cost_func = lambda module, data: comp_multadds_fw(
                module, data, use_gpu)
        elif cost_type == 'latency':
            cost_func = lambda module, data: latency_measure_fw(
                module, data, meas_times)
        else:
            raise NotImplementedError

        if len(data_shape) == 3:
            input_data = torch.randn((1, ) + tuple(data_shape))
        else:
            input_data = torch.randn(tuple(data_shape))
        if use_gpu:
            input_data = input_data.cuda()

        cost, block_data = cost_func(self.input_block, input_data)
        cost_list.append(cost)
        block_datas.append(block_data)
        total_cost += cost
        if hasattr(self, 'head_block'):
            cost, block_data = cost_func(self.head_block, block_data)
            cost_list[0] += cost
            block_datas[0] = block_data

        block_flops = []
        for block_id, block in enumerate(self.blocks):
            input_config = self.input_configs[block_id]
            inputs = [block_datas[i] for i in input_config['in_block_idx']]

            head_branch_flops = []
            for branch_id, head_branch in enumerate(
                    block.head_layer.head_branches):
                op_flops = []
                for op in head_branch._ops:
                    cost, block_data = cost_func(op, inputs[branch_id])
                    op_flops.append(cost)
                    total_cost += cost

                head_branch_flops.append(op_flops)

            stack_layer_flops = []
            if block.stack_layers.stack_layers is not None:
                for stack_layer in block.stack_layers.stack_layers:
                    op_flops = []
                    for op in stack_layer._ops:
                        cost, block_data = cost_func(op, block_data)
                        if isinstance(op, operations.Skip) and \
                                self.config.optim.sub_obj.skip_reg:
                            # skip_reg is used for regularization as the cost of skip is too small
                            cost = op_flops[0] / 10.
                        op_flops.append(cost)
                        total_cost += cost
                    stack_layer_flops.append(op_flops)
            block_flops.append([head_branch_flops, stack_layer_flops])
            block_datas.append(block_data)

        cost_list.append(block_flops)

        conv1_1_flops = []
        input_config = self.input_configs[-1]
        inputs = [block_datas[i] for i in input_config['in_block_idx']]
        for branch_id, branch in enumerate(
                self.conv1_1_block.conv1_1_branches):
            cost, block_data = cost_func(branch, inputs[branch_id])
            conv1_1_flops.append(cost)
            total_cost += cost
        block_datas.append(block_data)

        cost_list.append(conv1_1_flops)
        out = block_datas[-1]
        out = self.global_pooling(out)

        cost, out = cost_func(self.classifier, out.view(out.size(0), -1))
        cost_list.append(cost)
        total_cost += cost

        return cost_list, total_cost
Ejemplo n.º 4
0
    def get_flops_list(self, data_shape):
        flops_list = []
        block_datas = []
        total_flops = 0

        input_data = torch.randn((1, ) + tuple(data_shape)).cuda()
        flops, block_data = comp_multadds_fw(self.input_block, input_data)
        flops_list.append(flops)
        block_datas.append(block_data)
        total_flops += flops
        if hasattr(self, 'head_block'):
            flops, block_data = comp_multadds_fw(self.head_block, block_data)
            flops_list[0] += flops
            block_datas[0] = block_data

        block_flops = []
        for block_id, block in enumerate(self.blocks):
            input_config = self.input_configs[block_id]
            inputs = [block_datas[i] for i in input_config['in_block_idx']]

            head_branch_flops = []
            for branch_id, head_branch in enumerate(
                    block.head_layer.head_branches):
                op_flops = []
                for op in head_branch._ops:
                    flops, block_data = comp_multadds_fw(op, inputs[branch_id])
                    op_flops.append(flops)
                    total_flops += flops

                head_branch_flops.append(op_flops)

            stack_layer_flops = []
            if block.stack_layers.stack_layers is not None:
                for stack_layer in block.stack_layers.stack_layers:
                    op_flops = []
                    for op in stack_layer._ops:
                        flops, block_data = comp_multadds_fw(op, block_data)
                        if flops == 0:
                            flops = op_flops[0] / 10.
                        op_flops.append(flops)
                        total_flops += flops
                    stack_layer_flops.append(op_flops)
            block_flops.append([head_branch_flops, stack_layer_flops])
            block_datas.append(block_data)

        flops_list.append(block_flops)

        conv1_1_flops = []
        input_config = self.input_configs[-1]
        inputs = [block_datas[i] for i in input_config['in_block_idx']]
        for branch_id, branch in enumerate(
                self.conv1_1_block.conv1_1_branches):
            flops, block_data = comp_multadds_fw(branch, inputs[branch_id])
            conv1_1_flops.append(flops)
            total_flops += flops
        block_datas.append(block_data)

        flops_list.append(conv1_1_flops)
        out = block_datas[-1]
        out = self.global_pooling(out)

        flops, out = comp_multadds_fw(self.classifier,
                                      out.view(out.size(0), -1))
        flops_list.append(flops)
        total_flops += flops

        self.flops_list = flops_list
        return flops_list, total_flops