Beispiel #1
0
    print('a * b =')
    print(mul_nd.asnumpy())
    np.testing.assert_allclose(mul_nd.asnumpy(), a_np * b_np.astype(dtype_w))

    print('a / b =')
    print(div_nd.asnumpy())
    # numpy always round down, while in c, the numerator will be rounded to zero.
    #np.testing.assert_allclose(div_nd.asnumpy(), a_np / b_np)

    print('b / a =')
    print(rdiv_nd.asnumpy())

    print('max(a, b)=')
    print(gtm_nd.asnumpy())


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description='test of NNPU Op')
    parser.add_argument('--sim',
                        type=str,
                        help='the simulator to use',
                        default='S0',
                        choices=['S0', 'S1', 'SC'])
    args = parser.parse_args()
    with nnpu.Environment('./nnpu_config.yaml'):
        env = nnpu.get_env()
        nnpu.set_device(env, type=args.sim)
        test()
Beispiel #2
0
def max_pooling(inshape, outshape, cell_shape, innp, outdetype):
    ret = np.full(outshape, np.iinfo(outdetype).min, dtype=outdetype)
    for w in range(outshape[0]):
        for h in range(outshape[1]):
            for j in range(cell_shape):
                for k in range(cell_shape):
                    for l in range(outshape[2]):
                        ret[w][h][l] = max(
                            ret[w][h][l],
                            innp[w * cell_shape + j][h * cell_shape + k][l])
    return ret


# reduce max
with nnpu.Environment(cfg_path):
    env = nnpu.get_env()
    nnpu.set_device(env, type=args.sim)
    nnpu.set_dump(False)

    #==================================#
    # ------ first define shapes ------
    #==================================#

    # input data layout: HWC
    in_shape = (40, 40, 256)
    # pooling windows size, height == width.
    cell_shape = 2
    # in this demo we don't do padding, so input data height and width must be divisible to pooling window size.
    assert in_shape[0] % cell_shape == 0, 'error'
    assert in_shape[1] % cell_shape == 0, 'error'
Beispiel #3
0
                    help='enable profiling',
                    default=True)
parser.add_argument('--parallel',
                    type=bool,
                    help='enable parallel',
                    default=False)
parser.add_argument('--dimx', type=int, help='tile size of x', default=2)
parser.add_argument('--dimy', type=int, help='tile size of y', default=2)
args = parser.parse_args()

cfg_path = './nnpu_config-opt.yaml'
if (args.profile):
    profile_dir = '/home/jian/Documents/nnpu_profile'
    nnpu.set_profile(['timeline', 'memory_access_latency'], profile_dir)

with ScheduleProcHelper(), nnpu.Environment(cfg_path):
    env = nnpu.get_env()
    nnpu.set_device(env, type=args.sim)

    shape1 = (128, 256)
    shape2 = (128, 256)
    macops = shape1[0] * shape1[1] * shape2[0]
    gemm_shape = (8, 8, 8)
    factor = gemm_shape[1]
    assert shape1[1] == shape2[1], \
        'gemm do dot product between rows, so the shape[1] of inputs should match'
    assert shape1[0] % gemm_shape[
        0] == 0, 'gemm insn require size of input 1 be x{0}'.format(
            gemm_shape[0])
    assert shape2[0] % gemm_shape[
        2] == 0, 'gemm insn require size of input 2 be x{0}'.format(
Beispiel #4
0
import nnpu
import tvm
import topi
from nnpu.utils import ScheduleProcHelper
import numpy as np
import math
import argparse

parser = argparse.ArgumentParser(description='test gemm with tiled/non-tiled data')
parser.add_argument('--sim', type=str, help='the simulator to use', 
                        default='S0', choices=['S0', 'S1', 'SC'])
args = parser.parse_args()

with ScheduleProcHelper(), nnpu.Environment('./nnpu_config_fp32.yaml'):
    env = nnpu.get_env()
    nnpu.set_device(env, type=args.sim)
    dtype_n, dtype_w = env.cfg['dtype_n'], env.cfg['dtype_w']

    assert dtype_w in ['float32', 'float16'], 'when testing activation function, float dtype is needed'

    shape = (64, )
    a = tvm.placeholder(shape, dtype_w, 'a')
    a_buf = tvm.compute(shape, lambda *i: a(*i), 'a_buf')

    exp = tvm.compute(shape, lambda i: tvm.exp(a_buf[i]), 'exp')
    log = tvm.compute(shape, lambda i: tvm.log(a_buf[i]), 'exp')
    tanh = tvm.compute(shape, lambda i: tvm.tanh(a_buf[i]), 'exp')
    sigmoid = tvm.compute(shape, lambda i: tvm.sigmoid(a_buf[i]), 'exp')

    # k = tvm.reduce_axis((0, 16), 'k0')
    # sum = tvm.compute((1, ), lambda i: tvm.sum(sigmoid[k], axis=k), 'sum')
Beispiel #5
0
parser.add_argument('--profile',
                    type=bool,
                    help='enable profiling',
                    default=True)
args = parser.parse_args()

if (args.profile):
    profile_dir = '/home/jian/Documents/nnpu_profile'
    nnpu.set_profile(['timeline', 'memory_access_latency'], profile_dir)


def roundup(x, d):
    return (x + d - 1) // d * d


with ScheduleProcHelper(), nnpu.Environment('./nnpu_config-dense.yaml'):
    env = nnpu.get_env()
    nnpu.set_device(env, type=args.sim)

    shape1 = (128, 512)
    shape2 = (129, 512)

    macops = shape1[0] * shape1[1] * shape2[0]

    # upround shapes
    gemm_shape = (8, 8, 8)
    # shape1 = (roundup(shape1[0], gemm_shape[0]), shape1[1])
    shape2 = (roundup(shape2[0], gemm_shape[2]), shape2[1])

    bias_shape = (shape2[0], )
    factor = gemm_shape[1]