def test_dnnlowp_quantize(self, size, is_empty, absorb, gc, dc): if is_empty: size = 0 min_ = -10.0 max_ = 20.0 X = (np.random.rand(size) * (max_ - min_) + min_).astype(np.float32) X_min = 0 if X.size == 0 else X.min() X_max = 1 if X.size == 0 else X.max() X_scale = (max(X_max, 0) - min(X_min, 0)) / 255 X_zero = np.round(-X_min / X_scale) op_type_list = ["Quantize", "Int8Quantize"] engine = "DNNLOWP" for op_type in op_type_list: net = core.Net("test_net") quantize = core.CreateOperator( op_type, ["X"], ["X_q"], engine=engine, device_option=gc ) net.Proto().op.extend([quantize]) dnnlowp_pybind11.CreateInt8QuantParamsBlob( "quant_param", float(X_scale), int(X_zero) ) quantize_2 = core.CreateOperator( op_type, ["X", "quant_param"], ["X_q_2"], engine=engine, device_option=gc, ) net.Proto().op.extend([quantize_2]) if absorb: net_str = dnnlowp_pybind11.freeze_quantization_params( net.Proto().SerializeToString()) net.Proto().ParseFromString(net_str) workspace.FeedBlob("X", X, device_option=gc) workspace.RunNetOnce(net) X_q = workspace.FetchInt8Blob("X_q")[0] X_q_2 = workspace.FetchInt8Blob("X_q_2")[0] # Dequantize results and measure quantization error against inputs X_dq = X_scale * (X_q - X_zero) X_dq_2 = X_scale * (X_q_2 - X_zero) # should be divided by 2 in an exact math, but divide by 1.9 here # considering finite precision in floating-point numbers atol = X_scale / 1.9 np.testing.assert_allclose(X_dq, X, atol=atol, rtol=0) np.testing.assert_allclose(X_dq_2, X, atol=atol, rtol=0)
def run_conv_or_fc( test_case, init_net, net, X, W, b, op_type, engine, order, gc, outputs, scale=None, zero_point=None, ): if order: # Conv Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) else: # FC Output = collections.namedtuple("Output", ["Y", "op_type", "engine"]) # We run DNNLOWP ops multiple times to test their first runs that # do caching so exercises different code paths from the subsequent # runs # self.ws.run re-creates operator every time so this test covers # cases when we have multiple nets sharing the same workspace test_case.ws.create_blob("X").feed(X, device_option=gc) test_case.ws.create_blob("W").feed(W, device_option=gc) test_case.ws.create_blob("b").feed(b, device_option=gc) if scale is not None and zero_point is not None: with workspace.WorkspaceGuard(test_case.ws): dnnlowp_pybind11.CreateInt8QuantParamsBlob("quant_param", float(scale), int(zero_point)) if init_net: test_case.ws.run(init_net) for i in range(1 if engine == "" else 2): test_case.ws.run(net) Y = test_case.ws.blobs["Y"].fetch() if order: outputs.append( Output(Y=Y, op_type=op_type, engine=engine, order=order)) else: outputs.append(Output(Y=Y, op_type=op_type, engine=engine)) # workspace.CreateNet + workspace.RunNet reuses the same operator if engine != "": workspace.FeedBlob("X", X) workspace.FeedBlob("W", W) workspace.FeedBlob("b", b) if scale is not None and zero_point is not None: dnnlowp_pybind11.CreateInt8QuantParamsBlob("quant_param", float(scale), int(zero_point)) if init_net: workspace.RunNetOnce(init_net) workspace.CreateNet(net) for i in range(2): workspace.RunNet(net) Y = workspace.FetchBlob("Y") if order: outputs.append( Output(Y=Y, op_type=op_type, engine=engine, order=order)) else: outputs.append(Output(Y=Y, op_type=op_type, engine=engine))