def test_sym_nnvm(batch_size, iter_num):
    logger = logging.getLogger("log.test.nnvm")
    logger.info("=== Log Test NNVM ===")

    sym_file, param_file, ext_file = load_fname("_darknet53_voc",
                                                "all.quantize", True)
    dump_sym, dump_params = load_fname("_darknet53_voc", "all.nnvm.compile")
    sym, params = mx.sym.load(sym_file), nd.load(param_file)
    inputs_ext, _ = sim.load_ext(ext_file)
    spass.mxnet_to_nnvm(sym, params, inputs_ext, dump_sym, dump_params)
def test_sym_nnvm(batch_size, iter_num):
    logger = logging.getLogger("log.test.nnvm")
    logger.info("=== Log Test NNVM ===")

    sym_file, param_file, ext_file = load_fname("_darknet53_voc", "mrt.all.quantize", True)
    sym, params = mx.sym.load(sym_file), nd.load(param_file)
    inputs_ext, _ = sim.load_ext(ext_file)
    nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(sym, params, inputs_ext)
    spass.cvm_build(nnvm_sym, nnvm_params, inputs_ext, *load_fname("_darknet53_voc", "nnvm"))
def test_yxnet_mnist():
    mnist_sym = make_mnist_graph()

    inputs_ext = {
        'data': {
            'shape': (1, 1, 28, 28),
            'precision': 8,
        }
    }
    in_shape = (1, 1, 28, 28)
    arg_shapes, _, aux_shapes = mnist_sym.infer_shape(data=in_shape)
    args, auxs = mnist_sym.list_arguments(), mnist_sym.list_auxiliary_states()
    infer_shapes = {args[i]: arg_shapes[i] for i in range(len(args))}
    infer_shapes.update({auxs[i]: aux_shapes[i] for i in range(len(auxs))})

    root = "/home/serving/warehouse"
    _, bd = load_parameters(
        mnist_sym, infer_shapes,
        root + "/ca3d0286d5758697cdef653c1375960a868ac08a/data/params")
    mnist_sym, bd = spass.mx_set_precs(mnist_sym, bd, inputs_ext)

    dump_sym, dump_par = '/tmp/mnist_yxnet.symbol', '/tmp/mnist_yxnet.params'
    with open(dump_sym, 'w') as fout:
        fout.write(mnist_sym.tojson())
    nd.save(dump_par, bd)

    inputs = [mx.sym.var('data')]
    data = np.load(root + '/ba9fedfc87ccb6064fcd437fd2287f5edef1bd84/data')
    data = nd.array([data.astype(np.int8)])

    if False:
        graph = nn.SymbolBlock(mnist_sym, inputs)
        utils.load_parameters(graph, bd)
        res = graph.forward(data).astype('int32')
    else:
        prefix = "/tmp/yxnet/mnist"
        dump_sym, dump_params = prefix + ".json", prefix + ".params"
        print(sutils.sym_collect_attr(mnist_sym))
        spass.mxnet_to_nnvm(mnist_sym, bd, {'data': {
            'shape': (1, 1, 28, 28)
        }}, dump_sym, dump_params)
        exit()
    print(res.asnumpy().flatten()[:100])
def test_sym_nnvm(batch_size=10, iter_num=10):
    logger = logging.getLogger("log.test.nnvm")
    logger.info("=== Log Test NNVM ===")

    target = "llvm"
    tvm_ctx = tvm.context(target, 1)
    mx_ctx = mx.gpu(2)
    inputs_ext = {
        'data': {
            'shape': (batch_size, 3, 224, 224),
        }
    }
    inputs = [mx.sym.var(name) for name in inputs_ext]
    inputs_shape = {k: v['shape'] for k, v in inputs_ext.items()}

    data_iter = load_dataset(batch_size)

    def data_iter_func():
        data = data_iter.next()
        return data.data[0], data.label[0]

    data_iter_func()

    version = ""
    dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True)
    (inputs_ext, ) = sim.load_ext(dump_ext)
    sym, params = mx.sym.load(dump_sym), nd.load(dump_params)
    # sim.load_ins_ext(params, inputs_ext)

    # nnvm_sym, _ = nnvm.frontend.from_mxnet(sym)
    # with open('debug_nnvm_sym_after_load_from_mxnet.json', 'w') as fout:
    #    fout.write(nnvm_sym.debug_str())
    dump_sym, dump_params = load_fname(version, "nnvm.compile", False)
    spass.mxnet_to_nnvm(sym,
                        params,
                        inputs_ext,
                        dump_sym,
                        dump_params,
                        target='llvm')
Beispiel #5
0
def std_dump(sym,
             params,
             inputs_ext,
             data,
             model_name,
             is_mxnet=True,
             batch=False,
             data_dtype="int8",
             max_num=20,
             dump_ops=[]):
    if not batch:
        for k, v in inputs_ext.items():
            v['shape'] = (1, *v['shape'][1:])
        data = data[0].reshape(inputs_ext['data']['shape'])
    datadir = "/data/std_out/" + model_name
    os.makedirs(datadir, exist_ok=True)
    if is_mxnet:
        data = sim.load_real_data(data, 'data', inputs_ext)
        inputs_ext['data']['data'] = data
        spass.sym_dump_layer_outputs(sym,
                                     params,
                                     inputs_ext,
                                     datadir,
                                     data_dtype=data_dtype,
                                     max_num=max_num,
                                     dump_ops=dump_ops,
                                     ctx=mx.gpu(0))
        sym, params = spass.mxnet_to_nnvm(sym, params, inputs_ext)
    else:
        tvm_graph, tvm_params, lib = spass.cvm_build(sym,
                                                     params,
                                                     inputs_ext,
                                                     "/dev/null",
                                                     "/dev/null",
                                                     runtime="tvm",
                                                     target="llvm",
                                                     dtype="int32")
        model = graph_runtime.create(tvm_graph, lib, tvm.cpu())
        model.set_input(**params)
        model.set_input("data", data)
        model.run()
        np.save(datadir + "/data.npy", data.asnumpy().astype('int8'))
        for i in range(len(sym.list_output_names())):
            out = model.get_output(i).asnumpy()
            np.save("%s/result_%d.npy" % (datadir, i), out)

    return spass.cvm_build(sym, params, inputs_ext, datadir + "/symbol",
                           datadir + "/params")
Beispiel #6
0
def test_sym_pass(batch_size=10, iter_num=10, quantize=True):
    logger = logging.getLogger("log.test.sym.pass")
    calib_ctx = mx.gpu(1)
    ctx = [mx.gpu(int(i)) for i in "1,2,3,4".split(',') if i.strip()]
    inputs_ext = {
        'data': {
            'shape': (batch_size, 3, 224, 224),
        }
    }
    inputs = [mx.sym.var(name) for name in inputs_ext]

    logger.info("load dataset, symbol and parameters")
    # load dataset and iter function
    data_iter = ds.load_imagenet_rec(batch_size)

    def data_iter_func():
        data = data_iter.next()
        return data.data[0], data.label[0]

    data, _ = data_iter_func()

    # load original model for accuracy
    net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx)
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    acc_top1.reset()
    acc_top5.reset()

    def shufflenet(data, label):
        data = gluon.utils.split_and_load(data,
                                          ctx_list=ctx,
                                          batch_axis=0,
                                          even_split=False)
        res = [net1.forward(d) for d in data]
        res = nd.concatenate(res)
        acc_top1.update(label, res)
        _, top1 = acc_top1.get()
        acc_top5.update(label, res)
        _, top5 = acc_top5.get()
        return "top1={:6.2%} top5={:6.2%}".format(top1, top5)

    if quantize:
        # load original model
        sym_fname, param_fname = load_fname(version)
        sym, params = mx.sym.load(sym_fname), nd.load(param_fname)
        sym, params = spass.sym_quant_prepare(sym, params, inputs_ext)

        # quantize process
        mrt = _mrt.MRT(sym, params, inputs_ext)  # initialize
        mrt.set_data('data', data)  # set input data
        mrt.calibrate(ctx=calib_ctx)  # calibration
        mrt.set_output_prec(8)  # set output prec, do nothing by default
        qsym, qparams, inputs_ext = mrt.quantize()  # quantization

        # dump quantized model
        dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize",
                                                     True)
        sim.save_ext(dump_ext, inputs_ext)
        nd.save(dump_params, qparams)
        open(dump_sym, "w").write(qsym.tojson())

    if False:
        # convert to cvm executor model
        inputs_ext['data']['shape'] = (1, 3, 224, 224)
        nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(qsym, qparams, inputs_ext)
        spass.cvm_build(nnvm_sym, nnvm_params, inputs_ext,
                        *load_fname(version, "nnvm"))

    # load quantized model for accuracy
    dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True)
    (inputs_ext, ) = sim.load_ext(dump_ext)
    inputs = [mx.sym.var(n) for n in inputs_ext]
    net3 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx)

    # net3 = mx.gluon.nn.SymbolBlock(qsym, inputs)
    # utils.load_parameters(net3, qparams, ctx=ctx)
    qacc_top1 = mx.metric.Accuracy()
    qacc_top5 = mx.metric.TopKAccuracy(5)
    qacc_top1.reset()
    qacc_top5.reset()

    def cvm_quantize(data, label):
        data = sim.load_real_data(data, 'data', inputs_ext)
        data = gluon.utils.split_and_load(data,
                                          ctx_list=ctx,
                                          batch_axis=0,
                                          even_split=False)
        res = [net3.forward(d) for d in data]
        res = nd.concatenate(res)

        qacc_top1.update(label, res)
        _, top1 = qacc_top1.get()
        qacc_top5.update(label, res)
        _, top5 = qacc_top5.get()
        return "top1={:6.2%} top5={:6.2%}".format(top1, top5)

    # compare accuracy between models
    utils.multi_validate(shufflenet,
                         data_iter_func,
                         cvm_quantize,
                         iter_num=iter_num,
                         logger=logger)
Beispiel #7
0
def test_mx_quantize(batch_size=10, iter_num=10):
    logger = logging.getLogger("log.test.mx.quantize")

    ctx = [mx.gpu(int(i)) for i in "1,3".split(',') if i.strip()]
    inputs_ext = { 'data': {
        'shape': (batch_size, 3, 224, 224),
    }}
    inputs = [mx.sym.var(n) for n in inputs_ext]

    data_iter = ds.load_imagenet_rec(batch_size)
    def data_iter_func():
        data = data_iter.next()
        return data.data[0], data.label[0]
    data, _ = data_iter_func()

    net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx)
    acc_top1 = mx.metric.Accuracy()
    acc_top5 = mx.metric.TopKAccuracy(5)
    acc_top1.reset()
    acc_top5.reset()
    def mobilenet(data, label):
        data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False)
        res = [net1.forward(d) for d in data]
        res = nd.concatenate(res)
        acc_top1.update(label, res)
        _, top1 = acc_top1.get()
        acc_top5.update(label, res)
        _, top5 = acc_top5.get()
        return "top1={:6.2%} top5={:6.2%}".format(top1, top5)

    calib_ctx = mx.gpu(1)
    sym_fname, param_fname = load_fname(version)
    sym, params = mx.sym.load(sym_fname), nd.load(param_fname)
    sym, params = spass.sym_quant_prepare(sym, params, inputs_ext)
    if True:
        if True:
            mrt = _mrt.MRT(sym, params, inputs_ext)
            mrt.set_data('data', data)
            mrt.calibrate()
            # [ 0.0008745864 0.03330660510427334 ] 0.6670066884888368 0.7753906
            # mrt.set_threshold("mobilenet0_dense0_weight", 0.67)
            # # [ -0.0036011334 0.054821780899052534 ] 1.100036751338784 1.4626989
            # mrt.set_threshold("mobilenet0_conv24_batchnorm24_fwd_weight", 1.1)
            # # [ 0.013243316 1.7543557133786065 ] 70.18747185088569 94.66275
            # mrt.set_threshold("mobilenet0_conv23_batchnorm23_fwd_weight", 35.10)
            # # [ -0.0016149869 0.05713169649243355 ] 1.1442489167675376 1.7122083
            # mrt.set_threshold("mobilenet0_conv20_batchnorm20_fwd_weight", 1.144)
            # # [ -0.0015804865 0.04523811489343643 ] 0.9063427844084799 1.0745146
            # mrt.set_threshold("mobilenet0_conv16_batchnorm16_fwd_weight", 0.90)
            # # [ 0.4315614 2.447332109723772 ] 49.37820360490254 63.959927
            # mrt.set_threshold("mobilenet0_conv2_batchnorm2_fwd", 49.37)
            # # [ 0.9770754 1.3392452512468611 ] 27.761980422905516 40.729546
            # mrt.set_threshold("mobilenet0_relu2_fwd", 27.76)
            # [ 1.0975745 1.0489919010632773 ] 22.077412493692915 23.784576
            # mrt.set_threshold("mobilenet0_relu4_fwd", 22.08)
            # # [ 0.9885562 2.360489403014386 ] 48.19834426651407 69.22121
            # mrt.set_threshold("mobilenet0_conv5_batchnorm5_fwd", 48.2)
            # # [ 0.7895588 1.0544661745870065 ] 21.878882319617176 30.95745
            # mrt.set_threshold("mobilenet0_relu17_fwd", 21.88)
            # # [ 0.8717863 1.0887600296120434 ] 22.646986888608513 28.265652
            # mrt.set_threshold("mobilenet0_relu19_fwd", 22.65)
            # # [ 0.35124516 0.6501711574631898 ] 13.354668314135012 20.770807
            # mrt.set_threshold("mobilenet0_relu20_fwd", 13.35)
            # # [ 0.9378179 1.110470714216975 ] 23.147232155910086 27.886068
            # mrt.set_threshold("mobilenet0_relu21_fwd", 23.15)
            # # [ 0.36263302 0.6352599878026505 ] 13.067832775738754 17.18809
            # mrt.set_threshold("mobilenet0_relu22_fwd", 13.07)
            # # [ 0.19875833 0.49999100821358816 ] 10.198578498193196 16.625143
            # mrt.set_threshold("mobilenet0_relu24_fwd", 10.2)
            # # [ 0.32357717 1.6308352606637138 ] 65.55698759215218 75.84912
            # mrt.set_threshold("mobilenet0_conv25_batchnorm25_fwd", 32.94)
            # # [ 0.36793178 1.512995992388044 ] 30.62785163096019 49.464615
            # mrt.set_threshold("mobilenet0_relu26_fwd", 30.63)
            # # [ 18.028658 38.61970520019531 ] 790.4227619171143 805.51886
            # mrt.set_threshold("sum0", 790.423)
            mrt.set_output_prec(8)
            qsym, qparams, inputs_ext = mrt.quantize()
        else:
            inputs_ext['data']['data'] = data
            th_dict = calib.sym_calibrate(sym, params, inputs_ext, ctx=calib_ctx)
            qsym, qparams, precs, _ = calib.sym_simulate(sym, params, inputs_ext, th_dict)
            qsym, qparams = calib.sym_realize(qsym, qparams, inputs_ext, precs)
        dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True)
        sim.save_ext(dump_ext, inputs_ext)
        nd.save(dump_params, qparams)
        open(dump_sym, "w").write(qsym.tojson())

        dump_sym, dump_params = load_fname(version, "nnvm.compile")
        nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(qsym, qparams, inputs_ext)
        spass.cvm_build(nnvm_sym, nnvm_params, inputs_ext, dump_sym, dump_params)

    dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True)
    (inputs_ext,) = sim.load_ext(dump_ext)
    net2 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx)
    qacc_top1 = mx.metric.Accuracy()
    qacc_top5 = mx.metric.TopKAccuracy(5)
    qacc_top1.reset()
    qacc_top5.reset()
    def cvm_quantize(data, label):
        data = sim.load_real_data(data, 'data', inputs_ext)
        data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False)
        res = [net2.forward(d) for d in data]
        res = nd.concatenate(res)
        qacc_top1.update(label, res)
        _, top1 = qacc_top1.get()
        qacc_top5.update(label, res)
        _, top5 = qacc_top5.get()
        return "top1={:6.2%} top5={:6.2%}".format(top1, top5)

    utils.multi_validate(mobilenet, data_iter_func,
            cvm_quantize,
            iter_num=iter_num, logger=logger)
Beispiel #8
0
mrt = _mrt.MRT(sym, params, inputs_ext)  # initialize
mrt.set_data('data', data)  # set input data
mrt.calibrate(ctx=calib_ctx)  # calibration
mrt.set_output_prec(8)  # set output prec, do nothing by default
qsym, qparams, inputs_ext = mrt.quantize()  # quantization

if False:
    # dump quantized model
    dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True)
    sim.save_ext(dump_ext, inputs_ext)
    nd.save(dump_params, qparams)
    open(dump_sym, "w").write(qsym.tojson())

    # convert to cvm executor model
    inputs_ext['data']['shape'] = (1, 3, input_size, input_size)
    nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(qsym, qparams, inputs_ext)
    spass.cvm_build(nnvm_sym, nnvm_params, inputs_ext,
                    *load_fname(version, "nnvm"))

# load quantized model for accuracy
net3 = mx.gluon.nn.SymbolBlock(qsym, inputs)
utils.load_parameters(net3, qparams, ctx=ctx)
qacc_top1 = mx.metric.Accuracy()
qacc_top5 = mx.metric.TopKAccuracy(5)
qacc_top1.reset()
qacc_top5.reset()


def cvm_quantize(data, label):
    data = sim.load_real_data(data, 'data', inputs_ext)
    data = gluon.utils.split_and_load(data,