def test_load_quantized(): from megengine.core.tensor import dtype data_shape = (2, 28) data = tensor(np.random.random(data_shape), dtype="float32") data = data.astype(dtype.qint8(0.1)) mlp = MLP() quantize_qat(mlp) quantize(mlp) mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(dtype.qint8(0.001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(dtype.qint8(0.0002)).numpy()) mlp.eval() pred0 = mlp(data) with BytesIO() as fout: mge.save(mlp.state_dict(), fout) fout.seek(0) checkpoint = mge.load(fout) # change mlp weight. mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(dtype.qint8(0.00001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(dtype.qint8(0.2)).numpy()) mlp.load_state_dict(checkpoint) pred1 = mlp(data) np.testing.assert_allclose(pred0.astype("float32").numpy(), pred1.astype("float32").numpy(), atol=5e-6)
def __init__(self): super().__init__() self.quant = QAT.QuantStub() self.linear = Float.Sequential(QAT.Linear(3, 3), QAT.Linear(3, 3)) self.dequant = QAT.DequantStub() self.linear[0].bias[...] = Parameter(np.random.rand(3)) self.linear[1].bias[...] = Parameter(np.random.rand(3))
def test_conv_transpose2d(): SH, SW = 3, 1 PH, PW = 2, 0 N, IC, IH, IW = 4, 5, 8, 6 KH, KW = 3, 4 OC = 3 BIAS = False def getsize(inp, kern, stride): return (inp - 1) * stride + kern OH = getsize(IH, KH, SH) OW = getsize(IW, KW, SW) inp = np.random.normal(size=(N, IC, IH, IW)).astype(np.float32) out = np.zeros((N, OC, OH, OW), dtype=np.float32) weight = np.random.normal(size=(IC, OC, KH, KW)).astype(np.float32) bias = np.random.normal(size=(1, OC, 1, 1)).astype(np.float32) # naive calculation use numpy for n, ic, ih, iw in itertools.product(*map(range, [N, IC, IH, IW])): oh, ow = ih * SH, iw * SW out[n, :, oh : oh + KH, ow : ow + KW] += inp[n, ic, ih, iw] * weight[ic] out = out[:, :, PH : OH - PH, PW : OW - PW] if BIAS: out += bias # megengine conv_transpose2d calculation conv_transpose2d = ConvTranspose2d(IC, OC, (KH, KW), (SH, SW), (PH, PW), bias=BIAS) conv_transpose2d.weight = Parameter(weight, dtype=np.float32) if BIAS: conv_transpose2d.bias = Parameter(bias, dtype=np.float32) y = conv_transpose2d(tensor(inp)) np.testing.assert_almost_equal(out, y.numpy(), 2e-6)
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(N, OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def run_batch_conv_bias(inp, w, b): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) result = F.quantized.batch_conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, ) return result.astype("float32") expected = F.conv2d(inp_fp32, w_fp32[0], b_fp32 if has_bias else None)[0] expected = expected.astype(out_dtype).astype("float32") expected = F.flatten(expected) result = run_batch_conv_bias(inp_int8, w_int8, b_int32) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def test_module_api_hooks(): net = MyModule() pre_hook_num = 0 post_hook_num = 0 hooks = [] def pre_hook(module, inputs): nonlocal pre_hook_num pre_hook_num += 1 modified_inputs = tuple(inp + 1 for inp in inputs) return modified_inputs def post_hook(module, inputs, outputs): nonlocal post_hook_num post_hook_num += 1 outputs += 1 return outputs net.apply(lambda module: hooks.append( module.register_forward_pre_hook(pre_hook))) net.apply( lambda module: hooks.append(module.register_forward_hook(post_hook))) shape = (1, 4, 1, 1) x = tensor(np.zeros(shape, dtype=np.float32)) y = net(x) assert pre_hook_num == 4 assert post_hook_num == 4 mean1 = Parameter(np.zeros(shape), dtype=np.float32) bn1 = F.batch_norm(x + 3, mean1, Parameter(np.ones(shape), dtype=np.float32), training=True) np.testing.assert_allclose( net.i.bn.running_mean.numpy(), mean1.numpy(), ) mean2 = Parameter(np.zeros(shape), dtype=np.float32) bn2 = F.batch_norm(bn1 + 3, mean2, Parameter(np.ones(shape), dtype=np.float32), training=True) np.testing.assert_allclose( net.bn.running_mean.numpy(), mean2.numpy(), ) np.testing.assert_allclose((bn2 + 2).numpy(), y.numpy()) assert len(hooks) == 8 for handler in hooks: handler.remove() y = net(x) assert pre_hook_num == 4 assert post_hook_num == 4
def __init__(self, num_channels, eps=1e-05, affine=True): super().__init__() self.num_channels = num_channels self.eps = eps self.affine = affine if self.affine: self.weight = Parameter(np.ones(num_channels, dtype="float32")) self.bias = Parameter(np.zeros(num_channels, dtype="float32")) else: self.weight = None self.bias = None self.reset_parameters()
def __init__(self, num_features, eps=1e-5): super().__init__() self.num_features = num_features self.eps = eps self.weight = Parameter(np.ones(num_features, dtype=np.float32)) self.bias = Parameter(np.zeros(num_features, dtype=np.float32)) self.running_mean = Parameter( np.zeros((1, num_features, 1, 1), dtype=np.float32)) self.running_var = Parameter( np.ones((1, num_features, 1, 1), dtype=np.float32))
def __init__(self, num_groups, num_channels, eps=1e-5, affine=True): super().__init__() assert num_channels % num_groups == 0 self.num_groups = num_groups self.num_channels = num_channels self.eps = eps self.affine = affine if self.affine: self.weight = Parameter(np.ones(num_channels, dtype=np.float32)) self.bias = Parameter(np.zeros(num_channels, dtype=np.float32)) else: self.weight = None self.bias = None self.reset_parameters()
def test_conv(module): normal_net = getattr(Float, module)(3, 3, 3, 1, 1, 1, bias=True) normal_net.eval() qat_net = getattr(QAT, module)(3, 3, 3, 1, 1, 1, bias=True) qat_net.eval() disable_observer(qat_net) propagate_qconfig(qat_net, min_max_fakequant_qconfig) init_qat_net(qat_net) x = mge.tensor(np.random.normal(size=(1, 3, 3, 3)).astype("float32")) inp_scale = gen_inp_scale() x = fake_quant_act(x, inp_scale) x.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", inp_scale)) x_int8 = quant(x, inp_scale) weight = np.random.normal(size=(3, 3, 3, 3)).astype("float32") bias = np.random.normal(size=(1, 3, 1, 1)).astype("float32") if module in ("ConvBn2d", "ConvBnRelu2d"): normal_net.conv.weight[...] = fake_quant_weight(weight, weight_scale) normal_net.conv.bias[...] = fake_quant_bias(bias, inp_scale * weight_scale) qat_net.conv.weight[...] = Parameter(weight) qat_net.conv.bias[...] = Parameter(bias) else: normal_net.weight[...] = fake_quant_weight(weight, weight_scale) normal_net.bias[...] = fake_quant_bias(bias, inp_scale * weight_scale) qat_net.weight[...] = Parameter(weight) qat_net.bias[...] = Parameter(bias) qat_from_float = getattr(QAT, module).from_float_module(normal_net) qat_from_float.eval() disable_observer(qat_from_float) disable_fake_quant(qat_from_float) q_net = getattr(Q, module).from_qat_module(qat_net) q_net.eval() normal = normal_net(x) qat_without_fakequant = qat_from_float(x) fake_quant_normal = fake_quant_act(normal_net(x), act_scale) qat = qat_net(x) q = q_net(x_int8).numpy() * act_scale np.testing.assert_allclose(qat_without_fakequant, normal, atol=1e-5) np.testing.assert_allclose(qat, fake_quant_normal, atol=act_scale) np.testing.assert_allclose(q, fake_quant_normal.numpy(), atol=act_scale)
def init_qat_net(): net = QATNet() propagate_qconfig(net, min_max_fakequant_qconfig) min_val = np.random.randint(-127, 0, size=(3,)) max_val = np.random.randint(1, 127, size=(3,)) net.quant.act_observer.min_val[...] = Parameter(min_val[0]) net.quant.act_observer.max_val[...] = Parameter(max_val[0]) net.linear[0].weight_observer.min_val[...] = Parameter(min_val[1]) net.linear[0].weight_observer.max_val[...] = Parameter(max_val[1]) net.linear[0].act_observer.min_val[...] = Parameter(min_val[2]) net.linear[0].act_observer.max_val[...] = Parameter(max_val[2]) net.linear[1].weight_observer.min_val[...] = Parameter(min_val[1]) net.linear[1].weight_observer.max_val[...] = Parameter(max_val[1]) net.linear[1].act_observer.min_val[...] = Parameter(min_val[2]) net.linear[1].act_observer.max_val[...] = Parameter(max_val[2]) return net
def __init__(self, normalized_shape, eps=1e-05, affine=True, **kwargs): super().__init__(**kwargs) if isinstance(normalized_shape, int): normalized_shape = (normalized_shape, ) self.normalized_shape = tuple(normalized_shape) self.eps = eps self.affine = affine if self.affine: self.weight = Parameter( np.ones(self.normalized_shape, dtype="float32")) self.bias = Parameter( np.zeros(self.normalized_shape, dtype="float32")) else: self.weight = None self.bias = None self.reset_parameters()
def test_tensor_serialization(): with TemporaryFile() as f: data = np.random.randint(low=0, high=7, size=[233]) a = Tensor(data, device="cpu0", dtype=np.int32) mge.save(a, f) f.seek(0) b = mge.load(f) np.testing.assert_equal(a.numpy(), data) assert b.device.logical_name == "cpu0:0" assert b.dtype == np.int32 with TemporaryFile() as f: a = Parameter(np.random.random(size=(233, 2)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f) assert isinstance(b, Parameter) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f) assert type(b) is Tensor np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f, map_location="cpux") assert type(b) is Tensor assert "cpu" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: if mge.is_cuda_available(): device_org = mge.get_default_device() mge.set_default_device("gpu0") a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) mge.set_default_device("cpux") b = mge.load(f, map_location={"gpu0": "cpu0"}) assert type(b) is Tensor assert "cpu0" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) mge.set_default_device(device_org) with TemporaryFile() as f: a = Tensor(0) a.qparams.scale = Tensor(1.0) mge.save(a, f) f.seek(0) b = mge.load(f) assert isinstance(b.qparams.scale, Tensor) np.testing.assert_equal(b.qparams.scale.numpy(), 1.0)
def test_func( batch_size, in_channels, out_channels, input_height, input_width, kernel_size, stride, padding, dilation, groups, ): local_conv2d = LocalConv2d( in_channels=in_channels, out_channels=out_channels, input_height=input_height, input_width=input_width, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, ) inputs = np.random.normal(size=(batch_size, in_channels, input_height, input_width)).astype(np.float32) output_height = (input_height + padding * 2 - kernel_size) // stride + 1 output_width = (input_width + padding * 2 - kernel_size) // stride + 1 weights = np.random.normal(size=( groups, output_height, output_width, in_channels // groups, kernel_size, kernel_size, out_channels // groups, )).astype(np.float32) local_conv2d.weight = Parameter(weights) outputs = local_conv2d(tensor(inputs)) # naive calculation use numpy # only test output_height == input_height, output_width == input_width inputs = np.pad(inputs, ((0, 0), (0, 0), (1, 1), (1, 1))) expected = np.zeros( (batch_size, out_channels, output_height, output_width), dtype=np.float32, ) ic_group_size = in_channels // groups oc_group_size = out_channels // groups for n, oc, oh, ow in itertools.product( *map(range, [batch_size, out_channels, output_height, output_width])): ih, iw = oh * stride, ow * stride g_id = oc // oc_group_size expected[n, oc, ih, iw] = np.sum( inputs[n, g_id * ic_group_size:(g_id + 1) * ic_group_size, ih:ih + kernel_size, iw:iw + kernel_size, ] * weights[g_id, oh, ow, :, :, :, oc % oc_group_size]) np.testing.assert_almost_equal(outputs.numpy(), expected, 1e-5)
def test_local_conv2d(): batch_size = 10 in_channels = 4 out_channels = 8 input_height = 8 input_width = 8 kernel_size = 3 stride = 1 padding = 1 dilation = 1 groups = 1 local_conv2d = LocalConv2d( in_channels=in_channels, out_channels=out_channels, input_height=input_height, input_width=input_width, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, ) inputs = np.random.normal( size=(batch_size, in_channels, input_height, input_width) ).astype(np.float32) output_height = (input_height + padding * 2 - kernel_size) // stride + 1 output_width = (input_width + padding * 2 - kernel_size) // stride + 1 weights = np.random.normal( size=( groups, output_height, output_width, in_channels // groups, kernel_size, kernel_size, out_channels // groups, ) ).astype(np.float32) local_conv2d.weight = Parameter(weights) outputs = local_conv2d(tensor(inputs)) # naive calculation use numpy # only test output_height == input_height, output_width == input_width, group == 1 inputs = np.pad(inputs, ((0, 0), (0, 0), (1, 1), (1, 1))) expected = np.zeros( (batch_size, out_channels, output_height, output_width), dtype=np.float32, ) for n, oc, oh, ow in itertools.product( *map(range, [batch_size, out_channels, output_height, output_width]) ): ih, iw = oh * stride, ow * stride expected[n, oc, ih, iw] = np.sum( inputs[n, :, ih : ih + kernel_size, iw : iw + kernel_size] * weights[0, oh, ow, :, :, :, oc] ) assertTensorClose(outputs.numpy(), expected, max_err=1e-5)
def test_set_value(): v0 = np.random.random((2, 3)).astype(np.float32) param = Parameter(v0) v1 = np.random.random((2, 3)).astype(np.float32) param.set_value(v1) np.testing.assert_allclose(param.numpy(), v1, atol=5e-6) v2 = np.random.random((3, 3)).astype(np.float32) # TODO: add this # with pytest.raises(ValueError): # param.set_value(v2) np.testing.assert_allclose(param.numpy(), v1, atol=5e-6)
def run_batch_conv_bias(inp, w, b): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) result = F.quantized.batch_conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, ) return result.astype("float32")
def test_linear(): normal_net = Float.Linear(3, 3, bias=True) normal_net.eval() qat_net = QAT.Linear(3, 3, bias=True) qat_net.eval() disable_observer(qat_net) propagate_qconfig(qat_net, min_max_fakequant_qconfig) init_qat_net(qat_net) x = mge.tensor(np.random.normal(size=(3, 3)).astype("float32")) inp_scale = gen_inp_scale() x = fake_quant_act(x, inp_scale) x.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", inp_scale)) x_int8 = quant(x, inp_scale) weight = np.random.normal(size=(3, 3)).astype("float32") bias = np.random.normal(size=(3, )).astype("float32") normal_net.weight[...] = fake_quant_weight(weight, weight_scale) normal_net.bias[...] = fake_quant_bias(bias, inp_scale * weight_scale) qat_net.weight[...] = Parameter(weight) qat_net.bias[...] = Parameter(bias) qat_from_float = QAT.Linear.from_float_module(normal_net) qat_from_float.eval() disable_fake_quant(qat_from_float) disable_observer(qat_from_float) q_net = Q.Linear.from_qat_module(qat_net) q_net.eval() normal = normal_net(x) qat_without_fakequant = qat_from_float(x) fake_quant_normal = fake_quant_act(normal_net(x), act_scale) qat = qat_net(x) q = q_net(x_int8).numpy() * act_scale np.testing.assert_allclose(qat_without_fakequant, normal) np.testing.assert_allclose(qat, fake_quant_normal.numpy()) np.testing.assert_allclose(q, fake_quant_normal.numpy())
def test_tensor_serialization(): def tensor_eq(a, b): assert a.dtype == b.dtype assert a.device == b.device np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: data = np.random.randint(low=0, high=7, size=[233]) a = Tensor(data, device="xpux", dtype=np.int32) pickle.dump(a, f) f.seek(0) b = pickle.load(f) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Parameter(np.random.random(size=(233, 2)).astype(np.float32)) pickle.dump(a, f) f.seek(0) b = pickle.load(f) assert isinstance(b, Parameter) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) pickle.dump(a, f) f.seek(0) b = pickle.load(f) assert type(b) is Tensor np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) b = mge.load(f, map_location="cpux") assert type(b) is Tensor assert "cpu" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) with TemporaryFile() as f: if mge.is_cuda_available(): device_org = mge.get_default_device() mge.set_default_device("gpu0") a = Tensor(np.random.random(size=(2, 233)).astype(np.float32)) mge.save(a, f) f.seek(0) mge.set_default_device("cpux") b = mge.load(f, map_location={"gpu0": "cpu0"}) assert type(b) is Tensor assert "cpu0" in str(b.device) np.testing.assert_equal(a.numpy(), b.numpy()) mge.set_default_device(device_org)
def test_func( N, IC, ID, IH, IW, OC, KD, KH, KW, SD, SH, SW, PD, PH, PW, DD, DH, DW, bias=True, ): conv_transpose3d = ConvTranspose3d( in_channels=IC, out_channels=OC, kernel_size=(KD, KH, KW), stride=(SD, SH, SW), padding=(PD, PH, PW), dilation=(DD, DH, DW), bias=bias, ) OD = getsize(ID, KD, SD, DD) OH = getsize(IH, KH, SH, DH) OW = getsize(IW, KW, SW, DW) inp = np.random.normal(size=(N, IC, ID, IH, IW)) weight = np.random.normal(size=(IC, OC, KD, KH, KW)) out_np = np.zeros((N, OC, OD, OH, OW), dtype=np.float32) for n, ic, idepth, ih, iw in itertools.product( *map(range, [N, IC, ID, IH, IW]) ): od, oh, ow = idepth * SD, ih * SH, iw * SW out_np[n, :, od : od + KD, oh : oh + KH, ow : ow + KW] += ( inp[n, ic, idepth, ih, iw] * weight[ic] ) out_np = out_np[:, :, PD : OD - PD, PH : OH - PH, PW : OW - PW] conv_transpose3d.weight = Parameter(weight) out_meg = conv_transpose3d.forward(tensor(inp)) np.testing.assert_almost_equal(out_meg.numpy(), out_np, 1e-5)
def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, )
def test_elemwise_fuse_in_grad(trace_mode): w = Parameter(np.ones([4, 6]), dtype="float32") gm = GradManager().attach(w) opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4) # explicitly declare opt_level as 2 @trace(symbolic=trace_mode, opt_level=2) def f(): with gm: wm = F.sum(w**2, axis=1)**0.5 loss = wm.mean() gm.backward(loss) opt.step().clear_grad() return loss for i in range(3): y = f() y.numpy()
def __init__(self): super().__init__() self.params = [Parameter(1.0, dtype=np.float32) for i in range(10)]
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def convert_to_nchw4(var): var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])) var = F.transpose(var, (0, 1, 3, 4, 2)) return var def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = F.transpose(result, (0, 1, 4, 2, 3)) expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def __init__(self, param_shape): super().__init__() self.params = [ Parameter(np.ones(param_shape), dtype=np.float32) for i in range(10) ]
def __init__(self): super().__init__() self.a = Parameter([1.0], dtype=np.float32)
def __init__(self, a, b): super().__init__() self.a = Parameter(a, dtype=np.float32) self.b = Parameter(b, dtype=np.float32) self.layer1 = MulFunc()
def __init__(self, hidden_size, eps=1e-12): super(BertLayerNorm, self).__init__() self.weight = Parameter(np.ones(hidden_size).astype(np.float32)) self.bias = Parameter(np.zeros(hidden_size).astype(np.float32)) self.variance_epsilon = eps
def __init__(self): super().__init__() self.i = self.InnerModule() self.bn = BatchNorm2d(4) self.param = Parameter(np.ones(1, dtype=np.float32)) self.buff = Tensor(np.ones(1, dtype=np.float32))
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="IDENTITY", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KW, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = mgb.dtype.get_scale(inp_dtype) w_scale = mgb.dtype.get_scale(w_dtype) b_scale = mgb.dtype.get_scale(b_dtype) inpv = mgb.dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = mgb.dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = mgb.dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") jit.trace.enabled = True b_symbolic = True def convert_to_nchw4(var): return var.reshape(var.shapeof(0), var.shapeof(1) // 4, 4, var.shapeof(2), var.shapeof(3)).dimshuffle(0, 1, 3, 4, 2) @jit.trace(symbolic=b_symbolic) def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "RELU": return F.relu(O) else: return O @jit.trace(symbolic=b_symbolic) def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else np.zeros_like(b) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = F.flatten(b) return F.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = result.dimshuffle(0, 1, 4, 2, 3) expected = F.flatten(expected) result = F.flatten(result) assertTensorClose(result.numpy(), expected.numpy())
def __init__(self): super().__init__() self.a = Parameter([1.23], dtype="float32")