Exemple #1
0
    def test_raise_exception_bitwidth(self):
        """Raise an exception if an input value is larger than bitwidth."""
        packer = Packer(2, 32)

        test_input = np.zeros([64], dtype=np.float32)
        test_input[0:-1:2] = 1
        test_input[0:-1:4] = 4

        with self.assertRaises(ValueError):
            packer.run(test_input)
Exemple #2
0
    def test_raise_exception_wordsize(self):
        """Raise an exception if an input value is not multiple of word size."""
        packer = Packer(2, 32)

        test_input = np.zeros([83], dtype=np.float32)
        test_input[0:-1:2] = 1
        test_input[0:-1:4] = 4

        with self.assertRaises(ValueError):
            packer.run(test_input)
Exemple #3
0
    def test_bw1_dividable_by_wordsize(self):
        """Test for when the input tensor size is able to divide by wordsize (1 bit version)."""
        packer = Packer(1, 32)

        test_input = np.zeros([32], dtype=np.float32)
        test_input[0:6] = [0, 1, 0, 1, 0, 1]

        test_output = packer.run(test_input)

        self.assertEqual(test_output[0], 42)
Exemple #4
0
    def test_bw1_not_dividable_by_wordsize(self):
        """Test for when the input tensor size is not able to divide by wordsize (1 bit version)."""
        packer = Packer(1, 37)

        test_input = np.zeros([37], dtype=np.float32)
        test_input[0::2] = 1

        test_output = packer.run(test_input)
        expected_output = [1431655765]

        np.testing.assert_array_equal(test_output[0], expected_output)
Exemple #5
0
    def test_bw2_dividable_by_wordsize(self):
        """Test for when the input tensor size is able to divide by wordsize (2 bit version)."""
        packer = Packer(2, 32)

        test_input = np.zeros([32], dtype=np.float32)
        test_input[0:6] = [0, 3, 0, 3, 0, 3]

        test_output = packer.run(test_input)
        expected_output = [42, 42]

        np.testing.assert_array_equal(test_output[0], expected_output)
Exemple #6
0
def pass_lookup(graph: Graph) -> None:
    """Lookup.

    Parameters
    ----------
    graph : Graph
        The input graph. It will be modified in-place.
    """
    quantization_types = [
        'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half',
        'QTZ_binary_channel_wise_mean_scaling'
    ]

    to_be_removed = []
    exec_list = [
        n for n in sort_graph(graph) if n.op_type in quantization_types
    ]
    placeholder = [n for n in sort_graph(graph) if n.op_type in 'Input']

    for m in exec_list:
        quantizer = m

        p1 = quantizer.input_nodes[0]
        if p1.op_type != 'Reshape':
            continue
        p2 = p1.input_nodes[0]
        if p2.op_type != 'Reshape':
            continue
        p3 = p2.input_nodes[0]
        if p3.op_type != 'Gather':
            continue
        p4 = p3.input_nodes[0]
        if p4.op_type != 'Gather':
            continue
        gather_params = p4.input_nodes[0]
        if gather_params.rank != 2 or gather_params.shape[0] != 256:
            continue

        params = gather_params.data
        data = {'data': params}
        qtz_data = quantizer.run(**data)['data']

        word_size = 32
        lu_bitwidth = quantizer.nbit
        packer = Packer(lu_bitwidth, word_size)

        lsb = np.zeros((256, ), np.uint32)
        msb = np.zeros((256, ), np.uint32)

        idx = 0
        for p in qtz_data:
            data = packer.run(p.astype(np.float32), p.shape).flatten()
            lsb[idx] = data[0]
            msb[idx] = data[1]

            idx += 1

        pe_lsb = Constant('pe_lsb_new',
                          QUANTIZED_PACKED_KERNEL(),
                          lsb,
                          dimension_format='TC',
                          packed=True,
                          actual_shape=[256, word_size])
        pe_msb = Constant('pe_msb_new',
                          QUANTIZED_PACKED_KERNEL(),
                          msb,
                          dimension_format='TC',
                          packed=True,
                          actual_shape=[256, word_size])

        n, h, w, c = quantizer.shape
        shape = [1, h, w, 2, word_size]
        pe = Lookup('Lookup',
                    shape,
                    QUANTIZED_PACKED(), {
                        'input': placeholder[0],
                        'lsb': pe_lsb,
                        'msb': pe_msb
                    },
                    dimension_format='ChHWBCl')

        get_nodes_in_branch(quantizer, placeholder[0], to_be_removed)
        placeholder[0].remove_output('output')
        placeholder[0].add_output('output', pe)
        pe.add_outputs(quantizer.output_ops)

        output_op = quantizer.output_op_list[0]

        target_input_name = 'X'
        for input_name in output_op._input_names:
            if quantizer.equals(output_op._input_ops[input_name]):
                target_input_name = input_name
                break

        output_op.add_input(target_input_name, pe)

        graph.add_op(pe_lsb)
        graph.add_op(pe_msb)
        graph.add_op(pe)

    for op in to_be_removed:
        graph.remove_op(op)
Exemple #7
0
def pass_pack_weights(graph: Graph) -> None:
    """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words.
       If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values
       then one 32 bit word will contain 32 weights.

    Parameters
    ----------
    graph : Graph
        The input graph. It will be modified in-place.
    """
    exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv']
    quantization_types = [
        'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half',
        'QTZ_binary_channel_wise_mean_scaling'
    ]

    word_size = 32
    weight_bitwidth = 1
    packer = Packer(weight_bitwidth, word_size)
    to_be_removed = []
    b = 32

    for m in exec_list:
        conv_node = m

        # check if this is a quantized convolution
        if not conv_node.quantizer or not conv_node.a_quantizer:
            continue

        # Check if we support this kind of quantizer
        weight_quantizer = conv_node.quantizer
        if weight_quantizer.op_type not in quantization_types:
            continue

        # Quantize the weights
        weight_quantizer.run_forward()

        def pad_to_multiple_of_b(tensor, axis, b):
            shape = list(tensor.shape)
            pad = (((shape[axis] + b - 1) // b) * b) - shape[axis]
            shape[axis] = pad
            return np.zeros(shape) if pad else None

        padded_data = np.copy(weight_quantizer.data)

        for axis in [0, 3]:
            pad_tensor = pad_to_multiple_of_b(padded_data, axis, b)
            if pad_tensor is not None:
                padded_data = np.append(padded_data, pad_tensor, axis=axis)

        tca_output = np.copy(padded_data)
        oc, kh, kw, kd = padded_data.shape[:]
        padded_data = padded_data.flatten()
        tca_output = tca_output.flatten()

        out_index = 0
        for g in range(oc // b):
            for p in range(kd // b):
                for h in range(kh):
                    for w in range(kw):
                        for o in range(b):
                            for d in range(b):
                                idx = g * (kw * kh * kd * b) + p * b + h * (
                                    kw * kd) + w * kd + o * (kw * kh * kd) + d
                                tca_output[out_index] = padded_data[idx]
                                out_index += 1

        kn2row_output = np.zeros(oc * kh * kw * kd)
        out_index = 0
        for h in range(kh):
            for w in range(kw):
                for o in range(oc):
                    for i in range(kd):
                        idx = o * kh * kw * kd + h * kw * kd + w * kd + i
                        kn2row_output[out_index] = padded_data[idx]
                        out_index += 1

        op_data = weight_quantizer.binarizer(padded_data)
        data = packer.run(op_data.astype(np.float32),
                          weight_quantizer.dimension)

        tca_binarized_data = weight_quantizer.binarizer(tca_output)
        tca_packed_data = packer.run(tca_binarized_data.astype(np.float32),
                                     weight_quantizer.dimension)

        kn2row_binarized_data = weight_quantizer.binarizer(kn2row_output)
        kn2row_data = packer.run(kn2row_binarized_data.astype(np.float32),
                                 weight_quantizer.dimension)

        shape = [oc, kh, kw, kd]
        tca_shape = [oc // b, kd // b, kh, kw, b, b]
        kn2row_shape = [kh, kw, oc, kd]

        # Create the new constant with the quantized weights
        quantized_constant = Constant(
            weight_quantizer.name + '_new',
            PackedUint32(),
            data=np.vectorize(lambda k: (~k) & ((0x1 << 32) - 1))(data),
            dimension_format="NHWC",
            transposed_dimension_format="OhIhHWOlIl",
            packed=True,
            actual_shape=shape,
            transposed_shape=tca_shape,
            transposed_data=[(~k) & ((0x1 << 32) - 1)
                             for k in tca_packed_data.flatten()],
            kn2row_data=[k for k in kn2row_data.flatten()],
            kn2row_shape=kn2row_shape,
            kn2row_dimension_format="HWNC")

        # get nodes to be removed after being disconnected
        get_nodes_in_branch(weight_quantizer, None, to_be_removed)

        # Add the constant to the graph and connect the new constant
        graph.add_op(quantized_constant)
        quantized_constant.add_outputs(weight_quantizer.output_ops)
        for output_name, consumer_list in weight_quantizer.output_ops.items():
            for consumer_node in consumer_list:
                for input_name, input_node in consumer_node.input_ops.items():
                    if input_node == weight_quantizer:
                        consumer_node.add_input(input_name, quantized_constant)
                        break

    for op in to_be_removed:
        graph.remove_op(op)
Exemple #8
0
    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
        ops: List[Operator] = [
            node.input_ops[i] for i in node.input_names
            if node.input_ops.get(i)
        ]

        if self._hard_quantized and node in kwargs['qconv']:
            # data is to be packed
            ops_have_precomp_values = list(
                map(lambda x: self._has_precompute_value(x), ops))
            ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops))

            # check which input node can be pruned
            if reduce(
                    lambda x, y: x and y,
                    ops_have_precomp_values):  # all input has concrete values
                node.run_forward()
                self._precomp_dic[node.name] = True  # this node can be pruned
                quantizers = {
                    op.name: self._quantizers[op.name]
                    for op in ops if self._quantizers.get(op.name)
                }
                if len(quantizers) > 1:
                    ValueError(
                        f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.'
                    )
                self._quantizers[node.name] = list(quantizers.values())[0]

            else:  # an input (must be weight) is to be quantized and packed
                self._precomp_dic[node.name] = False
                node.is_quantized = True
                packer = Packer(self._quantized_bitwidth, self._wordsize)
                quantizers = {
                    op.name: self._quantizers[op.name]
                    for op in ops if self._quantizers.get(op.name)
                }
                if len(quantizers) > 1:
                    ValueError(
                        f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.'
                    )
                node.quantizer = list(quantizers.values())[0]

                for key, op in zip(node.input_names, ops):

                    if self._is_prunable(op):
                        shape = op.shape
                        op_data = node.quantizer.binarizer(op.data)
                        data = packer.run(op_data.astype(np.float32),
                                          op.dimension)
                        dtype = op.dtype
                        new_op = Constant(op.name + '_new',
                                          dtype,
                                          data,
                                          packed=True,
                                          actual_shape=shape)
                        node.add_input(key, new_op)
                        self._graph.add_op(new_op)
                        self._prune(op)

        else:
            self._precompute_or_prune_inputs(node)
Exemple #9
0
    def create_quantized_graph2(self, data1: np.ndarray, data2: np.ndarray,
                                data3: np.ndarray) -> Graph:
        graph = Graph()

        # input
        x = Input(
            'placeholder',
            [1, 5, 5, 3],
            Float32(),
        )

        # constant and internal nodes
        scaling1, qdata1 = self.binary_mean_scaling(data1)
        w = Constant('weight', Float32(), qdata1 * scaling1)

        q = QTZ_binary_mean_scaling('qtz1', [3, 2, 2, 3], Float32(),
                                    {'input': w})

        # Conv
        conv1 = Conv('conv1', [1, 4, 4, 3],
                     Float32(), {
                         'X': x,
                         'W': w
                     },
                     kernel_shape=[2, 2])

        s1 = Constant('aq_const1', Float32(), np.array(1))

        s2 = Constant('aq_const2', Float32(), np.array(2))

        aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3],
                                       QUANTIZED_NOT_PACKED(), {
                                           'X': conv1,
                                           'Y': s1,
                                           'Z': s2
                                       })

        from modules.packer import Packer
        packer = Packer(1, 32)
        scaling2, qdata2 = self.binary_mean_scaling(data2)
        w2 = Constant('weight2',
                      Uint32(),
                      packer.run(qdata2),
                      packed=True,
                      actual_shape=[3, 2, 2, 3])

        q2 = QTZ_binary_mean_scaling('qtz2', [3, 2, 2, 3], Float32(),
                                     {'input': w2})
        q2.scaling_factor = scaling2

        conv2 = Conv(
            'conv2',
            [1, 3, 3, 3],
            Float32(),
            {
                'X': aq,
                'W': w2
            },
            kernel_shape=[2, 2],
            quantized=True,
        )
        conv2.quantizer = q2

        scaling3, qdata3 = self.binary_mean_scaling(data3)
        w3 = Constant('weight2',
                      Uint32(),
                      packer.run(qdata3),
                      packed=True,
                      actual_shape=[3, 2, 2, 3])

        q3 = QTZ_binary_mean_scaling('qtz3', [3, 2, 2, 3], Float32(),
                                     {'input': w3})
        q3.scaling_factor = scaling3

        conv3 = Conv('conv3', [1, 3, 3, 3],
                     Float32(), {
                         'X': aq,
                         'W': w3
                     },
                     kernel_shape=[2, 2],
                     quantized=True)
        conv3.quantizer = q3

        y1 = Output('output1', [1, 3, 3, 3], Float32(), {'input': conv2})

        y2 = Output('output2', [1, 3, 3, 3], Float32(), {'input': conv3})

        # add ops to the graph
        graph.add_op_and_inputs(y1)
        graph.add_op_and_inputs(y2)

        return graph, scaling2, scaling3
Exemple #10
0
    def create_quantized_graph(self, data: np.ndarray, data2: np.ndarray, data3: np.ndarray) \
            -> Tuple[Graph, np.float32, np.float32]:
        graph = Graph()

        # two inputs
        x = Input(
            'placeholder',
            [1, 5, 5, 3],
            Float32(),
        )

        from modules.packer import Packer
        packer = Packer(1, 32)
        data = data.transpose([3, 2, 1, 0])
        scaling, qdata = self.binary_mean_scaling(data)
        shape = list(data.shape)
        w = Constant(
            'weight',
            Float32(),
            qdata * scaling,
        )

        q = QTZ_binary_mean_scaling('qtz1', shape, Float32(), {'input': w})
        q.scaling_factor = scaling

        # Conv
        conv1 = Conv(
            'conv1',
            [1, 4, 4, 3],
            Float32(),
            {
                'X': x,
                'W': w
            },
            kernel_shape=[2, 2],
        )

        s1 = Constant('aq_const1', Float32(), np.array(1))

        s2 = Constant('aq_const2', Float32(), np.array(2))

        aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3],
                                       QUANTIZED_NOT_PACKED(), {
                                           'X': conv1,
                                           'Y': s1,
                                           'Z': s2
                                       })

        dummy = Transpose('dummy', [1, 4, 4, 3],
                          QUANTIZED_NOT_PACKED(), {'data': aq},
                          perm=[0, 1, 2, 3])

        scaling2, qdata2 = self.binary_mean_scaling(data2)
        w2 = Constant('weight2',
                      Uint32(),
                      packer.run(qdata2),
                      packed=True,
                      actual_shape=[3, 2, 2, 3])

        # quantizer connected to conv2 as 'conv2.quantizer'
        q2 = QTZ_binary_mean_scaling('qtz2', [3, 2, 2, 3], Uint32(),
                                     {'input': w2})
        q2.scaling_factor = scaling2

        conv2 = Conv('conv2', [1, 3, 3, 3],
                     Float32(), {
                         'X': dummy,
                         'W': w2
                     },
                     kernel_shape=[2, 2],
                     quantized=True)
        conv2.quantizer = q2

        s3 = Constant('aq_const1', Float32(), np.array(1))

        s4 = Constant('aq_const2', Float32(), np.array(2))

        aq2 = QTZ_linear_mid_tread_half('aqtz2', [1, 3, 3, 3], Float32(), {
            'X': conv2,
            'Y': s3,
            'Z': s4
        })

        w3 = Constant('weight3', Float32(), data3)

        conv3 = Conv('conv3', [1, 2, 2, 3],
                     Float32(), {
                         'X': aq2,
                         'W': w3
                     },
                     kernel_shape=[2, 2])

        # One output
        y = Output('output', [1, 2, 2, 3], Float32(), {'input': conv3})

        # add ops to the graph
        graph.add_op_and_inputs(y)

        return graph, scaling, scaling2
Exemple #11
0
def pass_pack_weights(graph: Graph) -> None:
    """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words.
       If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values
       then one 32 bit word will contain 32 weights.

    Parameters
    ----------
    graph : Graph
        The input graph. It will be modified in-place.
    """
    exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv']
    quantization_types = [
        'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half',
        'QTZ_binary_channel_wise_mean_scaling'
    ]

    word_size = 32
    weight_bitwidth = 1
    packer = Packer(weight_bitwidth, word_size)
    to_be_removed = []

    for m in exec_list:
        conv_node = m

        # check if this is a quantized convolution
        if not conv_node.quantizer or not conv_node.a_quantizer:
            continue

        # Check if we support this kind of quantizer
        weight_quantizer = conv_node.quantizer
        if weight_quantizer.op_type not in quantization_types:
            continue

        # Quantize the weights
        weight_quantizer.run_forward()
        op_data = weight_quantizer.binarizer(weight_quantizer.data)
        data = packer.run(op_data.astype(np.float32),
                          weight_quantizer.dimension)

        # Create the new constant with the quantized weights
        oh = conv_node.height
        ow = conv_node.width
        od = conv_node.channel
        kh = conv_node.kernel_height
        kw = conv_node.kernel_width
        kd = conv_node.input_ops['X'].channel
        quantized_constant = Constant(weight_quantizer.name + '_new',
                                      Uint32(),
                                      data,
                                      packed=True,
                                      actual_shape=weight_quantizer.shape,
                                      transposed_data=_transpose_kernels(
                                          data, oh, ow, od, kh, kw, kd))

        # get nodes to be removed after being disconnected
        get_nodes_in_branch(weight_quantizer, None, to_be_removed)

        # Add the constant to the graph and connect the new constant
        graph.add_op(quantized_constant)
        quantized_constant.add_outputs(weight_quantizer.output_ops)
        for output_name, consumer_list in weight_quantizer.output_ops.items():
            for consumer_node in consumer_list:
                for input_name, input_node in consumer_node.input_ops.items():
                    if input_node == weight_quantizer:
                        consumer_node.add_input(input_name, quantized_constant)
                        break

    for op in to_be_removed:
        graph.remove_op(op)