def build_packed_bnn(input_image, w_conv1, bn_t1, w_conv2, bn_t2, w_fc1, b_fc1, w_fc2, b_fc2): # 1*16*16 if PACK_CONV: conv1 = bnn.packed_conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1", out_dtype=qtype_int) # 16*16*16 bn1 = bnn.packed_batch_norm_threshold(conv1, bn_t1, name="bn1") # bn1 = bnn.packed_conv2d_nchw(input_image, w_conv1, threshold=bn_t1, padding=[1,1], name="conv1", out_dtype=qtype_int) # 16*16*16 else: conv1 = bnn.conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1", out_dtype=qtype_int) # 16*16*16 bn1 = bnn.batch_norm_threshold(conv1, bn_t1, name="bn1") maxpool1 = bnn.packed_max_pool2d_nchw(bn1, [2, 2], [2, 2], name="maxpool1", unpack=not PACK_CONV) # 16*8*8 # maxpool1 = bnn.packed_max_pool2d_LB(bn1, [2,2], [2,2], name="maxpool1") # 16*8*8 if PACK_CONV: conv2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2", out_dtype=qtype_int) # 32*8*8 bn2 = bnn.packed_batch_norm_threshold(conv2, bn_t2, name="bn2") # bn2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, threshold=bn_t2, padding=[1,1], name="conv2", out_dtype=qtype_int) # 32*8*8 else: conv2 = bnn.conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2", out_dtype=qtype_int) # 32*8*8 bn2 = bnn.batch_norm_threshold(conv2, bn_t2, name="bn2") maxpool2 = bnn.packed_max_pool2d_nchw(bn2, [2, 2], [2, 2], name="maxpool2", unpack=not PACK_CONV) # 32*4*4=512 # maxpool2 = bnn.packed_max_pool2d_LB(bn2, [2,2], [2,2], name="maxpool2") # 32*4*4=512 if PACK_CONV: pack = bnn.packed_flatten(maxpool2, name="packed_flatten") else: flat = bnn.flatten(maxpool2, name="flatten") pack = hcl.pack(flat, axis=1, factor=32, dtype=qtype_packed, name="pack") # 512/32=16 fc1 = bnn.packed_dense(pack, w_fc1, b_fc1, True, name="fc1") # 512/32->256/32 fc2 = bnn.packed_dense(fc1, w_fc2, b_fc2, False, name="fc2") # 256/32->10 return fc2
def build_packed_bnn(*arrays): # 1*16*16 hcl_comp = [] for i, array in enumerate(arrays): if i in [0, 1]: dtype = qtype_bit elif i == 3: dtype = hcl.UInt(16) elif i in [5, 7]: dtype = qtype_packed else: dtype = qtype_float hcl_comp.append( hcl.compute(array.shape, lambda *dim: array[dim], name="copy_{}".format(i), dtype=dtype)) input_image = hcl_comp[0] w_conv1 = hcl_comp[1] bn_t1 = hcl_comp[2] w_conv2 = hcl_comp[3] bn_t2 = hcl_comp[4] w_fc1 = hcl_comp[5] b_fc1 = hcl_comp[6] w_fc2 = hcl_comp[7] b_fc2 = hcl_comp[8] conv1 = bnn.packed_conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1", out_dtype=qtype_int) # 16*16*16 bn1 = bnn.packed_batch_norm_threshold(conv1, bn_t1, name="bn1") # maxpool1 = bnn.packed_max_pool2d_nchw(bn1, [2,2], [2,2], name="maxpool1",unpack=False) # 16*8*8 maxpool1 = bnn.packed_max_pool2d_LB(bn1, [2, 2], [2, 2], name="maxpool1") conv2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2", out_dtype=qtype_int) # 32*8*8 bn2 = bnn.packed_batch_norm_threshold(conv2, bn_t2, name="bn2") # maxpool2 = bnn.packed_max_pool2d_nchw(bn2, [2,2], [2,2], name="maxpool2",unpack=False) # 16*8*8 maxpool2 = bnn.packed_max_pool2d_LB(bn2, [2, 2], [2, 2], name="maxpool2") # 32*4*4=512 pack = bnn.packed_flatten(maxpool2, name="packed_flatten") fc1 = bnn.packed_dense(pack, w_fc1, b_fc1, True, name="fc1") # 512/32->256/32 fc2 = bnn.packed_dense(fc1, w_fc2, b_fc2, False, name="fc2") # 256/32->10 return fc2
def kernel(A, F): return bnn.packed_conv2d_nchw(A, F, padding=[1, 1], strides=[2, 2], name="layer2_0_conv1", out_dtype=hcl.UInt(16), mac=False)
def build_packed_bnn(input_image, w_conv1, bn_t1, w_conv2, bn_t2, w_fc1, b_fc1, w_fc2, b_fc2): # 1*16*16 conv1 = bnn.packed_conv2d_nchw(input_image, w_conv1, padding=[1,1], name="conv1", out_dtype=qtype_int) # 16*16*16 bn1 = bnn.packed_batch_norm_threshold(conv1, bn_t1, name="bn1") # maxpool1 = bnn.packed_max_pool2d_nchw(bn1, [2,2], [2,2], name="maxpool1",unpack=False) # 16*8*8 maxpool1 = bnn.packed_max_pool2d_LB(bn1, [2,2], [2,2], name="maxpool1") conv2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, padding=[1,1], name="conv2", out_dtype=qtype_int) # 32*8*8 bn2 = bnn.packed_batch_norm_threshold(conv2, bn_t2, name="bn2") # maxpool2 = bnn.packed_max_pool2d_nchw(bn2, [2,2], [2,2], name="maxpool2",unpack=False) # 16*8*8 maxpool2 = bnn.packed_max_pool2d_LB(bn2, [2,2], [2,2], name="maxpool2") # 32*4*4=512 pack = bnn.packed_flatten(maxpool2,name="packed_flatten") fc1 = bnn.packed_dense(pack, w_fc1, b_fc1, True, name="fc1") # 512/32->256/32 fc2 = bnn.packed_dense(fc1, w_fc2, b_fc2, False, name="fc2") # 256/32->10 return fc2
def kernel(A, F): conv = bnn.packed_conv2d_nchw(A, F, padding=[1, 1], strides=[1, 1], name="layer3_0_conv1", out_dtype=hcl.UInt(16), mac=False) print(conv.shape, conv.dtype) return conv
def build_packed_bnn(input_image): # 1*16*16 w_conv1 = hcl.const_tensor(packed_params["w_conv1"], "w_conv1", qtype_bit) bn_t1 = hcl.const_tensor(packed_params["bn_t1"], "bn_t1", qtype_float) w_conv2 = hcl.const_tensor(packed_params["w_conv2"], "w_conv2", hcl.UInt(16)) bn_t2 = hcl.const_tensor(packed_params["bn_t2"], "bn_t2", qtype_float) w_fc1 = hcl.const_tensor(packed_params["w_fc1"], "w_fc1", qtype_packed) b_fc1 = hcl.const_tensor(packed_params["b_fc1"], "b_fc1", qtype_float) w_fc2 = hcl.const_tensor(packed_params["w_fc2"], "w_fc2", qtype_packed) b_fc2 = hcl.const_tensor(packed_params["b_fc2"], "b_fc2", qtype_float) conv1 = bnn.packed_conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1", out_dtype=qtype_int, bitwidth=1) # 16*16*16 bn1 = bnn.packed_batch_norm_threshold(conv1, bn_t1, name="bn1") maxpool1 = bnn.packed_max_pool2d_LB(bn1, [2, 2], [2, 2], name="maxpool1") # 16*8*8 conv2 = bnn.packed_conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2", out_dtype=qtype_int, bitwidth=16) # 32*8*8 bn2 = bnn.packed_batch_norm_threshold(conv2, bn_t2, name="bn2") maxpool2 = bnn.packed_max_pool2d_LB(bn2, [2, 2], [2, 2], name="maxpool2") # 32*4*4=512 pack = bnn.packed_flatten(maxpool2, name="packed_flatten") fc1 = bnn.packed_dense(pack, w_fc1, b_fc1, True, name="fc1") # 512/32->256/32 fc2 = bnn.packed_dense(fc1, w_fc2, b_fc2, False, name="fc2", dtype=dtype_out) # 256/32->10 return fc2
def forward(self, x): # 1st residual block rsign1 = packed_RSign(x, self.params["rsign1"], name=self.name + "_rsign1") conv1 = bnn.packed_conv2d_nchw(rsign1, self.params["conv1"], padding=[1, 1], strides=[self.stride, self.stride], name=self.name + "_conv1", out_dtype=qtype_int, mac=False) # no bias! bn1, _, _ = nn.batch_norm(conv1, *self.params["bn1"], name=self.name + "_bn1", dtype=qtype_float) if self.stride != 1 or self.flag: avgpool = nn.avg_pool2d_LB(x, pooling=[2, 2], stride=[2, 2], padding=[0, 0], name=self.name + "_avgpool", dtype=qtype_float) # dont use nn.concatenate! shape = avgpool.shape shortcut = hcl.compute( (shape[0], shape[1] * 2, shape[2], shape[3]), lambda nn, cc, ww, hh: avgpool[nn, cc % shape[1], ww, hh], name=self.name + "_concat", dtype=qtype_float) else: shortcut = x residual1 = hcl.compute(bn1.shape, lambda nn, cc, ww, hh: bn1[nn, cc, ww, hh] + shortcut[nn, cc, ww, hh], name=self.name + "_residual1", dtype=qtype_float) # 2nd residual block rprelu1 = RPReLU(residual1, *self.params["rprelu1"], name=self.name + "_rprelu1", dtype=qtype_float) rsign2 = packed_RSign(rprelu1, self.params["rsign2"], name=self.name + "_rsign2") conv2 = bnn.packed_conv2d_nchw(rsign2, self.params["conv2"], strides=[1, 1], padding=[1, 1], name=self.name + "_conv2", out_dtype=qtype_int, mac=False) bn2, _, _ = nn.batch_norm(conv2, *self.params["bn2"], name=self.name + "_bn2", dtype=qtype_float) residual2 = hcl.compute(rprelu1.shape, lambda nn, cc, ww, hh: bn2[nn, cc, ww, hh] + rprelu1[nn, cc, ww, hh], name=self.name + "_residual2", dtype=qtype_float) rprelu2 = RPReLU(residual2, *self.params["rprelu2"], name=self.name + "_rprelu2", dtype=qtype_float) return rprelu2