コード例 #1
0
    def test_linear_quant_batchedmatmul_8bit(self):
        np.random.seed(1988)
        W = np.random.rand(32, 32) * 2.0 - 1
        bias = np.random.rand(32)

        input_features = [("data", datatypes.Array(2, 32))]
        output_features = [("out", None)]
        builder = NeuralNetworkBuilder(
            input_features, output_features, disable_rank5_shape_mapping=True
        )
        builder.add_batched_mat_mul(
            name="batched_matmul",
            input_names=["data"],
            output_name="out",
            weight_matrix_rows=32,
            weight_matrix_columns=32,
            W=W,
            bias=bias,
        )
        mlmodel = MLModel(builder.spec)
        q_mlmodel = quantize_weights(mlmodel, 8)
        q_spec = q_mlmodel.get_spec()
        q_layer = q_spec.neuralNetwork.layers[0].batchedMatmul

        self.assertTrue(len(q_layer.weights.floatValue) == 0)
        self.assertTrue(len(q_layer.weights.rawValue) > 0)

        data = np.random.rand(2, 32)
        data_dict = {"data": data}
        out = q_mlmodel.predict(data_dict, useCPUOnly=True)["out"]
        expected_out = np.matmul(data, W) + bias
        self.assertTrue(out.shape == expected_out.shape)
        self.assertTrue(np.allclose(out.flatten(), expected_out.flatten(), atol=0.1))
コード例 #2
0
    def test_linear_quant_batchedmatmul_5bit(self):
        W = np.zeros((2, 3), dtype=np.uint8)
        W[0, :] = [31, 20, 11]
        W[1, :] = [1, 0, 8]
        quant_scale = np.reshape(np.array([10.0, 2.0, 3.0]), (1, 3))
        quant_bias = np.reshape(np.array([-2.0, -10.0, 6.0]), (1, 3))
        W_unquantized = np.broadcast_to(quant_scale, (2, 3)) * W + np.broadcast_to(quant_bias, (2, 3))
        bias = np.array([1.0, 2.0, 3.0])

        input_features = [('data', datatypes.Array(2, 2))]
        output_features = [('out', None)]
        builder = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
        builder.add_batched_mat_mul(name='batched_matmul',
                                    input_names=['data'], output_name='out',
                                    weight_matrix_rows=2, weight_matrix_columns=3,
                                    W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 5).tobytes(),
                                    bias=bias,
                                    is_quantized_weight=True,
                                    quantization_type='linear',
                                    nbits=5,
                                    quant_scale=quant_scale.flatten(),
                                    quant_bias=quant_bias.flatten())
        mlmodel = MLModel(builder.spec)
        data = np.zeros((2, 2), dtype=np.float32)
        data[0, :] = [5, 6]
        data[1, :] = [10, 12]
        data_dict = {'data': data}
        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
        expected_out = np.matmul(data, W_unquantized) + bias
        self.assertTrue(out.shape == expected_out.shape)
        self.assertTrue(np.allclose(out.flatten(), expected_out.flatten()))