print('a * b =') print(mul_nd.asnumpy()) np.testing.assert_allclose(mul_nd.asnumpy(), a_np * b_np.astype(dtype_w)) print('a / b =') print(div_nd.asnumpy()) # numpy always round down, while in c, the numerator will be rounded to zero. #np.testing.assert_allclose(div_nd.asnumpy(), a_np / b_np) print('b / a =') print(rdiv_nd.asnumpy()) print('max(a, b)=') print(gtm_nd.asnumpy()) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='test of NNPU Op') parser.add_argument('--sim', type=str, help='the simulator to use', default='S0', choices=['S0', 'S1', 'SC']) args = parser.parse_args() with nnpu.Environment('./nnpu_config.yaml'): env = nnpu.get_env() nnpu.set_device(env, type=args.sim) test()
def max_pooling(inshape, outshape, cell_shape, innp, outdetype): ret = np.full(outshape, np.iinfo(outdetype).min, dtype=outdetype) for w in range(outshape[0]): for h in range(outshape[1]): for j in range(cell_shape): for k in range(cell_shape): for l in range(outshape[2]): ret[w][h][l] = max( ret[w][h][l], innp[w * cell_shape + j][h * cell_shape + k][l]) return ret # reduce max with nnpu.Environment(cfg_path): env = nnpu.get_env() nnpu.set_device(env, type=args.sim) nnpu.set_dump(False) #==================================# # ------ first define shapes ------ #==================================# # input data layout: HWC in_shape = (40, 40, 256) # pooling windows size, height == width. cell_shape = 2 # in this demo we don't do padding, so input data height and width must be divisible to pooling window size. assert in_shape[0] % cell_shape == 0, 'error' assert in_shape[1] % cell_shape == 0, 'error'
help='enable profiling', default=True) parser.add_argument('--parallel', type=bool, help='enable parallel', default=False) parser.add_argument('--dimx', type=int, help='tile size of x', default=2) parser.add_argument('--dimy', type=int, help='tile size of y', default=2) args = parser.parse_args() cfg_path = './nnpu_config-opt.yaml' if (args.profile): profile_dir = '/home/jian/Documents/nnpu_profile' nnpu.set_profile(['timeline', 'memory_access_latency'], profile_dir) with ScheduleProcHelper(), nnpu.Environment(cfg_path): env = nnpu.get_env() nnpu.set_device(env, type=args.sim) shape1 = (128, 256) shape2 = (128, 256) macops = shape1[0] * shape1[1] * shape2[0] gemm_shape = (8, 8, 8) factor = gemm_shape[1] assert shape1[1] == shape2[1], \ 'gemm do dot product between rows, so the shape[1] of inputs should match' assert shape1[0] % gemm_shape[ 0] == 0, 'gemm insn require size of input 1 be x{0}'.format( gemm_shape[0]) assert shape2[0] % gemm_shape[ 2] == 0, 'gemm insn require size of input 2 be x{0}'.format(
import nnpu import tvm import topi from nnpu.utils import ScheduleProcHelper import numpy as np import math import argparse parser = argparse.ArgumentParser(description='test gemm with tiled/non-tiled data') parser.add_argument('--sim', type=str, help='the simulator to use', default='S0', choices=['S0', 'S1', 'SC']) args = parser.parse_args() with ScheduleProcHelper(), nnpu.Environment('./nnpu_config_fp32.yaml'): env = nnpu.get_env() nnpu.set_device(env, type=args.sim) dtype_n, dtype_w = env.cfg['dtype_n'], env.cfg['dtype_w'] assert dtype_w in ['float32', 'float16'], 'when testing activation function, float dtype is needed' shape = (64, ) a = tvm.placeholder(shape, dtype_w, 'a') a_buf = tvm.compute(shape, lambda *i: a(*i), 'a_buf') exp = tvm.compute(shape, lambda i: tvm.exp(a_buf[i]), 'exp') log = tvm.compute(shape, lambda i: tvm.log(a_buf[i]), 'exp') tanh = tvm.compute(shape, lambda i: tvm.tanh(a_buf[i]), 'exp') sigmoid = tvm.compute(shape, lambda i: tvm.sigmoid(a_buf[i]), 'exp') # k = tvm.reduce_axis((0, 16), 'k0') # sum = tvm.compute((1, ), lambda i: tvm.sum(sigmoid[k], axis=k), 'sum')
parser.add_argument('--profile', type=bool, help='enable profiling', default=True) args = parser.parse_args() if (args.profile): profile_dir = '/home/jian/Documents/nnpu_profile' nnpu.set_profile(['timeline', 'memory_access_latency'], profile_dir) def roundup(x, d): return (x + d - 1) // d * d with ScheduleProcHelper(), nnpu.Environment('./nnpu_config-dense.yaml'): env = nnpu.get_env() nnpu.set_device(env, type=args.sim) shape1 = (128, 512) shape2 = (129, 512) macops = shape1[0] * shape1[1] * shape2[0] # upround shapes gemm_shape = (8, 8, 8) # shape1 = (roundup(shape1[0], gemm_shape[0]), shape1[1]) shape2 = (roundup(shape2[0], gemm_shape[2]), shape2[1]) bias_shape = (shape2[0], ) factor = gemm_shape[1]