def test_weight_async_reorder(): data = mx.sym.Variable("data") w1 = mx.sym.Variable("1_weight") w2 = mx.sym.Variable("2_weight") conv1 = mx.sym.Convolution(data=data, weight=w1 + w1, num_filter=32, no_bias=True, kernel=(3, 3)) conv2 = mx.sym.Convolution(data=conv1, weight=w2 + w2, num_filter=32, no_bias=True, kernel=(1, 1)) mod = Module(symbol=conv2, label_names=None, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', (10, 16, 50, 50))]) mod.init_params(initializer=mx.init.Xavier(magnitude=2.)) data = [ mx.random.uniform(-1.0, 1.0, shape=(10, 16, 50, 50), ctx=mx.current_context()) ] batch = mx.io.DataBatch(data, []) for i in range(2): mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read()
def check_quantize(sym, data_shape, out_type, name='conv', check_calibration=True, gluon_forward=False): sg_pass_name = config[name][SG_PASS_NAME] post_sg_pass_name = config[name][POST_SG_PASS_NAME] fc = mx.sym.FullyConnected(data=sym, num_hidden=10, flatten=True, name='fc_softmax') if gluon_forward == True: sym = fc sym_sg = sym.get_backend_symbol(sg_pass_name) mod = Module(symbol=sym, label_names=[]) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) else: sym = mx.sym.SoftmaxOutput(data=fc, name='softmax') sym_sg = sym.get_backend_symbol(sg_pass_name) label_shape = (data_shape[0], 10) mod = Module(symbol=sym) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() data = [mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() ref_out = mod.get_outputs() excluded_sym_names = [] if mx.current_context() == mx.cpu() and gluon_forward == True: excluded_sym_names += ['sg_mkldnn_fully_connected_0'] excluded_sym_names += ['fc_softmax'] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, num_calib_examples=5) qsym = qsym.get_backend_symbol(post_sg_pass_name) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) if gluon_forward == True: check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape) else: check_qsym_dummy_forward(qsym, batch, data_shape, label_shape) quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape) for i in range(len(ref_out)): assert_almost_equal(ref_out[i].asnumpy(), quantized_out[i].asnumpy(), atol = 1)
def load_model(sym): if system_dict["gpu"]: ctx = mx.gpu(int(system_dict["gpu"])) else: ctx = mx.cpu(0) # load params arg_params, aux_params = load_param(system_dict["params"], ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, system_dict["img_long_side"], system_dict["img_long_side"])), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) return mod
def check_quantize_model(qdtype): def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params(qsym, params) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype sym = get_fp32_sym() mod = Module(symbol=sym) batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params() arg_params, aux_params = mod.get_params() qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype)
def check_quantize_whole_model(out_type): batch_size = 4 data_shape = (batch_size, 4, 10, 10) data = mx.sym.Variable('data') conv0 = mx.sym.Convolution(data, kernel=(1, 1), num_filter=16, name='conv0') sym = mx.sym.Convolution(conv0, kernel=(1, 1), num_filter=16, name='conv1') sym_sg = sym.get_backend_symbol('MKLDNN_QUANTIZE') mod = Module(symbol=sym, label_names=None) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() excluded_sym_names = [] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = mx.io.NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, label_names=None, num_calib_examples=1) qsym = qsym.get_backend_symbol('MKLDNN_QUANTIZE') check_qsym_forward(qsym, qarg_params, qaux_params, data_shape)
def convert_net(sym, args): # setup context ctx = mx.cpu(0) # weight_map = get_weight_map(args.step_old, args.is_bin_old, # args.step_new, args.is_bin_new) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # forward mod.save_checkpoint(args.save_prefix, epoch=0)
def load_model(sym): ''' User function: Loads the trained model weights Args: sym (mxnet model): Mxnet model returned from set_network() function Returns: mxnet model: Model with trained weights ''' if system_dict["gpu"]: ctx = mx.gpu(int(system_dict["gpu"])) else: ctx = mx.cpu(0) # load params arg_params, aux_params = load_param(system_dict["params"], ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, system_dict["img_long_side"], system_dict["img_long_side"])), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) return mod
def check_quantize_model(qdtype): def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params(qsym, params) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype sym = get_fp32_sym() mod = Module(symbol=sym) batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params() arg_params, aux_params = mod.get_params() qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype)
def demo_net(sym, class_names, args): # print config print('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu(0) # load single test im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # generate data batch data_batch = generate_batch(im_tensor, im_info) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # forward forward_starts = time.time() mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois.wait_to_read() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] forward_costs = time.time() - forward_starts print("forward costs %.4f" % (forward_costs)) im_info = im_info[0] # decode detection det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) # print out for [cls, conf, x1, y1, x2, y2] in det: if cls > 0 and conf > args.vis_thresh: print(class_names[int(cls)], conf, [x1, y1, x2, y2]) # if vis if args.vis: vis_detection(im_orig, det, class_names, thresh=args.vis_thresh, prefix=args.image)
def check_qsym_dummy_forward(qsym, batch, data_shape): mod = Module(symbol=qsym, label_names=None, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.init_params(initializer=mx.init.Xavier(magnitude=2.)) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() return mod.get_outputs()
def check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape): mod = Module(symbol=qsym, label_names=None, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.set_params(qarg_params, qaux_params) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() return mod.get_outputs()
def check_quantize(sym, data_shape, check_conv=True): fc = mx.sym.FullyConnected(data=sym, num_hidden=10, flatten=True, name='fc') sym = mx.sym.SoftmaxOutput(data=fc, name='softmax') sym_sg = sym.get_backend_symbol("MKLDNN") label_shape = (data_shape[0], 10) mod = Module(symbol=sym) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() data = [ mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() ref_out = mod.get_outputs() excluded_sym_names = [] if mx.current_context() == mx.cpu(): excluded_sym_names += ['fc'] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype='uint8', calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, calib_quantize_op=True, num_calib_examples=5) qsym = qsym.get_backend_symbol("MKLDNN_POST_QUANTIZE") if check_conv: check_qsym_calibrated(qsym) quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape) for i in range(len(ref_out)): assert_almost_equal(ref_out[i].asnumpy(), quantized_out[i].asnumpy(), atol=1) check_qsym_dummy_forward(qsym, batch, data_shape, label_shape)
def check_quantize(sym, data_shape, out_type, name='conv', check_calibration=True, gluon_forward=False, check_scale_align=False): if name in config: name = config[name][OP_NAME] sym_sg = sym.get_backend_symbol(QUANTIZE_SG_PASS_NAME) mod = Module(symbol=sym, label_names=None) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() if out_type == 'uint8': data = [mx.random.uniform(0.0, 1.0, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes] else: data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() ref_out = mod.get_outputs() excluded_sym_names = [] excluded_op_names = [] if mx.current_context() == mx.cpu() and gluon_forward == True: excluded_op_names += ['_sg_mkldnn_fully_connected'] calib_data = CalibIter(batch, data_shape, 1) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, excluded_op_names=excluded_op_names, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, calib_layer=None, label_names=None, num_calib_examples=1) qsym = qsym.get_backend_symbol(QUANTIZE_SG_PASS_NAME) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) if check_scale_align: check_qsym_scale_align(qsym) if gluon_forward == True: check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape) else: quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape) for i in range(len(ref_out)): min_range = mx.nd.min(ref_out[i]).asscalar() max_range = mx.nd.max(ref_out[i]).asscalar() atol = 0.1 * max(abs(min_range), abs(max_range)) assert_almost_equal_with_err(quantized_out[i].asnumpy(), ref_out[i].asnumpy(), rtol=0.1, atol=atol, etol=0.2) check_qsym_dummy_forward(qsym, batch, data_shape)
def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape): mod = Module(symbol=qsym, label_names=None, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.set_params(qarg_params, qaux_params) data = [mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read()
def demo_net(sym, class_names, args): # print config print('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu(0) # load single test im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # generate data batch data_batch = generate_batch(im_tensor, im_info) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # forward mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] im_info = im_info[0] # decode detection det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) # print out for [cls, conf, x1, y1, x2, y2] in det: if cls > 0 and conf > args.vis_thresh: print(class_names[int(cls)], conf, [x1, y1, x2, y2]) # if vis if args.vis: vis_detection(im_orig, det, class_names, thresh=args.vis_thresh)
def test_net(sym, imdb, args): # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context ctx = mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] # start detection with tqdm(total=imdb.num_images) as pbar: for i, data_batch in enumerate(test_data): # forward im_info = data_batch.data[1][0] mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) for j in range(1, imdb.num_classes): indexes = np.where(det[:, 0] == j)[0] all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :] pbar.update(data_batch.data[0].shape[0]) # evaluate model imdb.evaluate_detections(all_boxes)
def check_quantize(sym, data_shape, check_conv=True): fc = mx.sym.FullyConnected(data=sym, num_hidden=10, flatten=True, name='fc') sym = mx.sym.SoftmaxOutput(data=fc, name='softmax') sym_sg = sym.get_backend_symbol("MKLDNN") label_shape = (data_shape[0], 10) mod = Module(symbol=sym) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() data = [mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() ref_out = mod.get_outputs() excluded_sym_names = [] if mx.current_context() == mx.cpu(): excluded_sym_names += ['fc'] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype='uint8', calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, calib_quantize_op=True, num_calib_examples=5) qsym = qsym.get_backend_symbol("MKLDNN_POST_QUANTIZE") if check_conv: check_qsym_calibrated(qsym) quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape) for i in range(len(ref_out)): assert_almost_equal(ref_out[i].asnumpy(), quantized_out[i].asnumpy(), atol = 1) check_qsym_dummy_forward(qsym, batch, data_shape, label_shape)
class Predictor(object): def __init__(self, symbol, data_names, label_names, context=mx.cpu(), max_data_shapes=None, provide_data=None, provide_label=None, arg_params=None, aux_params=None): #self._mod = MutableModule(symbol, data_names, label_names, # context=context, max_data_shapes=max_data_shapes) self._mod = Module(symbol, data_names, label_names, context=context) self._mod.bind(provide_data, provide_label, for_training=False) self._mod.init_params(arg_params=arg_params, aux_params=aux_params) def predict(self, data_batch): self._mod.forward(data_batch) return dict(zip(self._mod.output_names, self._mod.get_outputs())) #TODO
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape): mod = mx.mod.Module(symbol=qsym, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.set_params(qarg_params, qaux_params) data = [ mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() sym = get_fp32_residual() batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) length = batch_size # specify num of outputs from split op msym = get_fp32_sym_with_multiple_outputs(length) msym_label_shape = (length, 10) msym_data_shape = (length, 4, 4, 10, 10) for s, dshape, lshape in zip((sym, msym), (data_shape, msym_data_shape), (label_shape, msym_label_shape)): mod = Module(symbol=s) mod.bind(data_shapes=[('data', dshape)], label_shapes=[('softmax_label', lshape)]) mod.init_params() arg_params, aux_params = mod.get_params() excluded_names = [] if mx.current_context() == mx.cpu(): excluded_names += ['fc'] excluded_names += ['concat'] optional_names = ['pool0'] for skip_optional_names in [False, True]: exclude_sym_names = [] if skip_optional_names: excluded_sym_names = excluded_names else: excluded_sym_names = excluded_names + optional_names qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_forward(qsym, qarg_params, qaux_params, dshape, lshape) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype) check_qsym_forward(qsym, qarg_params, qaux_params, dshape, lshape)
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print('skipped testing quantized_residual_unit for native cpu since it is not supported yet') return elif qdtype == 'int8' and is_test_for_mkldnn(): print('skipped testing quantized_residual_unit for mkldnn cpu int8 since it is not supported yet') return elif qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_residual_unit for gpu uint8 since it is not supported yet') return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params(qsym, params) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape): mod = mx.mod.Module(symbol=qsym, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.set_params(qarg_params, qaux_params) data = [mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() sym = get_fp32_residual() mod = Module(symbol=sym) batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params() arg_params, aux_params = mod.get_params() excluded_sym_names = [] if mx.current_context() == mx.cpu(): excluded_sym_names += ['fc'] qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape) calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype) check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape)
def fit(self, train_data, eval_data=None, eval_metric='acc', validate_metric=None, work_load_list=None, epoch_end_callback=None, batch_end_callback=None, fixed_param_prefix=None, initializer=None, arg_params=None, aux_params=None, allow_missing=False, optimizer=None, optimizer_params=None, begin_epoch=0, num_epoch=None, kvstore='device'): self.module.bind(data_shapes=self.data_shapes, label_shapes=self.label_shapes, for_training=True) self.module.init_params(initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=allow_missing) self.module.init_optimizer(kvstore=kvstore, optimizer=optimizer, optimizer_params=optimizer_params) if validate_metric is None: validate_metric = eval_metric if not isinstance(eval_metric, metric.EvalMetric): eval_metric = metric.create(eval_metric) temp_count = 0 # # test model size by saving params of model # arg_params, aux_params = self.module.get_params() # for callback in _as_list(epoch_end_callback): # callback(0, self.symbol, arg_params, aux_params) # raise NotImplementedError # training loop for epoch in range(begin_epoch, num_epoch): train_time = AverageMeter() kvstore_sync_time = AverageMeter() get_data_time = AverageMeter() iter_total_time = AverageMeter() tic = time.time() eval_metric.reset() nbatch = 0 data_iter = iter(train_data) end_of_batch = False next_data_batch = next(data_iter) while not end_of_batch: start_time = time.time() data_batch = next_data_batch self.module.forward(data_batch, is_train=True) self.module.backward() # ndarray.waitall() train_time.update(time.time() - start_time) self.module.update() # ndarray.waitall() kvstore_sync_time.update(time.time() - start_time) try: next_data_batch = next(data_iter) except StopIteration: end_of_batch = True # ndarray.waitall() get_data_time.update(time.time() - start_time) if isinstance(data_batch, list): self.module.update_metric(eval_metric, [db.label for db in data_batch], pre_sliced=True) else: self.module.update_metric(eval_metric, data_batch.label) # ndarray.waitall() iter_total_time.update(time.time() - start_time) if batch_end_callback is not None: # batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, # eval_metric=eval_metric, # locals=locals()) batch_end_params = BatchEndParam( epoch=epoch, nbatch=nbatch, eval_metric=eval_metric, locals=locals(), rank=kvstore.rank, total_iter=temp_count, cur_data_time=get_data_time.val, avg_data_time=get_data_time.avg, cur_batch_time=train_time.val, avg_batch_time=train_time.avg, cur_kvstore_sync_time=kvstore_sync_time.val, avg_kvstore_sync_time=kvstore_sync_time.avg, cur_iter_total_time=iter_total_time.val, avg_iter_total_time=iter_total_time.avg) for callback in _as_list(batch_end_callback): callback(batch_end_params) nbatch += 1 temp_count += 1 for name, val in eval_metric.get_name_value(): self.logger.info('Epoch[%d] Train-%s=%f', epoch, name, val) toc = time.time() self.logger.info('Epoch[%d] Time cost=%.3f', epoch, (toc - tic)) arg_params, aux_params = self.module.get_params() self.module.set_params(arg_params, aux_params) if epoch_end_callback is not None and kvstore.rank == 0: for callback in _as_list(epoch_end_callback): callback(epoch, self.symbol, arg_params, aux_params) if eval_data: if self.config.network == 'mobilenet_int8_foldbn': # for fold bn to create inference symbol total_params_path = "./model/%s-%04d.params" % ( self.config.model_prefix, epoch + 1) # total_params_path = "./model/mobilenet_flodbn_0904/mobilenet_int8_flodbn_imagenet_retrain_80_pertensor-fold-0100.params" # _, arg_params, aux_params = mx.model.load_checkpoint('./model/mobilenet_flodbn_0904/mobilenet_int8_flodbn_imagenet_retrain_80_pertensor-fold', 100) import os assert os.path.exists( total_params_path ), "please provide the correct total_params_path for foldbn eval" eval_sym = eval(self.config.network)( num_classes=self.config.num_classes, quant_mod=self.config.quant_mod, delay_quant=self.config.delay_quant, is_weight_perchannel=self.config.is_weight_perchannel, total_params_path=total_params_path, quantize_flag=self.config.quantize_flag) eval_module = Module( symbol=eval_sym, data_names=self.data_names, label_names=self.label_names, logger=self.logger, context=self.context, work_load_list=self.work_load_list, fixed_param_names=self.fixed_param_names) eval_module.bind(data_shapes=self.data_shapes, label_shapes=self.label_shapes, for_training=False) eval_module.init_params(initializer=initializer, arg_params=arg_params, aux_params=aux_params) res = eval_module.score(eval_data, validate_metric, score_end_callback=None, batch_end_callback=None, reset=True, epoch=epoch) else: res = self.module.score(eval_data, validate_metric, score_end_callback=None, batch_end_callback=None, reset=True, epoch=epoch) for name, val in res: self.logger.info('Epoch[%d] Validation-%s=%f', epoch, name, val) train_data.reset()
def dummy_data(ctx, batch_size=1): return [ mx.nd.random.uniform(shape=shape, ctx=ctx) for shape in ([batch_size, 3, 600, 600], [batch_size]) ] data_names = ['data'] label_names = None data_shapes = [('data', (1, 3, 1000, 600))] label_shapes = None data = mx.symbol.Variable(name="data") GLUON_LAYER = VGGConvBlock(isBin=True, step=4) GLUON_LAYER.hybridize() conv_feat = GLUON_LAYER(data) arg_params, aux_params = load_param( "/home/skutukov/work/mxnet_fasterrcnn_binary/convert/temp-0000.params", ctx=mx.cpu()) check_shape(conv_feat, data_shapes, arg_params, aux_params) mod = Module(conv_feat, data_names, label_names, context=mx.cpu()) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) data1, _ = dummy_data(ctx=mx.cpu()) # mod.forward(data1) mod.save_checkpoint('test_vgg', epoch=0)
def demo_net(sym, class_names, args): # print config print('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu(0) # load single test im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # generate data batch data_batch = generate_batch(im_tensor, im_info) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # forward start=time.time() mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() print("time=", time.time() - start) #rois = rois.asnumpy() rois = rois[:, 1:] #print('rois=',rois) scores = scores[0] bbox_deltas = bbox_deltas[0] #print("BBox_deltas.shape=",bbox_deltas.shape) #print("BBOX_deltas=",bbox_deltas) im_info = im_info[0] # decode detection det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) # print out for [cls, conf, x_c,y_c,w,h,theta] in det: if cls > 0 and conf > args.vis_thresh: print('class_name=',class_names[int(cls)], 'conf=',conf, [x_c , y_c, w,h,theta]) if True: draw_rotate_box_cv(det,class_names,0.95)
def quantize_model(sym, arg_params, aux_params, data_names=('data', ), label_names=('softmax_label', ), ctx=cpu(), excluded_sym_names=None, calib_mode='entropy', calib_data=None, num_calib_examples=None, calib_layer=None, quantized_dtype='int8', logger=logging): """User-level API for generating a quantized model from a FP32 model w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. The quantization implementation adopts the TensorFlow's approach: https://www.tensorflow.org/performance/quantization. The calibration implementation borrows the idea of Nvidia's 8-bit Inference with TensorRT: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf and adapts the method to MXNet. Parameters ---------- sym : str or Symbol Defines the structure of a neural network for FP32 data types. arg_params : dict Dictionary of name to `NDArray`. aux_params : dict Dictionary of name to `NDArray`. data_names : a list of strs Data names required for creating a Module object to run forward propagation on the calibration dataset. label_names : a list of strs Label names required for creating a Module object to run forward propagation on the calibration dataset. ctx : Context Defines the device that users want to run forward propagation on the calibration dataset for collecting layer output statistics. Currently, only supports single context. excluded_sym_names : list of strings A list of strings representing the names of the symbols that users want to excluding from being quantized. calib_mode : str If calib_mode='none', no calibration will be used and the thresholds for requantization after the corresponding layers will be calculated at runtime by calling min and max operators. The quantized models generated in this mode are normally 10-20% slower than those with calibrations during inference. If calib_mode='naive', the min and max values of the layer outputs from a calibration dataset will be directly taken as the thresholds for quantization. If calib_mode='entropy' (default mode), the thresholds for quantization will be derived such that the KL divergence between the distributions of FP32 layer outputs and quantized layer outputs is minimized based upon the calibration dataset. calib_data : DataIter A data iterator initialized by the calibration dataset. num_calib_examples : int or None The maximum number of examples that user would like to use for calibration. If not provided, the whole calibration dataset will be used. calib_layer : function Given a layer's output name in string, return True or False for deciding whether to calibrate this layer. If yes, the statistics of the layer's output will be collected; otherwise, no information of the layer's output will be collected. If not provided, all the layers' outputs that need requantization will be collected. quantized_dtype : str The quantized destination type for input data. Currently support 'int8' , 'uint8' and 'auto'. 'auto' means automatically select output type according to calibration result. Default value is 'int8'. logger : Object A logging object for printing information during the process of quantization. Returns ------- tuple A tuple of quantized symbol, quantized arg_params, and aux_params. ------- """ if excluded_sym_names is None: excluded_sym_names = [] if not isinstance(excluded_sym_names, list): raise ValueError( 'excluded_sym_names must be a list of strings representing' ' the names of the symbols that will not be quantized,' ' while received type %s' % str(type(excluded_sym_names))) logger.info('Quantizing symbol') if quantized_dtype not in ('int8', 'uint8', 'auto'): raise ValueError('unknown quantized_dtype %s received,' ' expected `int8`, `uint8` or `auto`' % quantized_dtype) qsym = _quantize_symbol(sym, excluded_symbols=excluded_sym_names, offline_params=list(arg_params.keys()), quantized_dtype=quantized_dtype) th_dict = {} if calib_mode is not None and calib_mode != 'none': if not isinstance(ctx, Context): raise ValueError( 'currently only supports single ctx, while received %s' % str(ctx)) if calib_data is None: raise ValueError('calib_data must be provided when calib_mode=%s' % calib_mode) if not isinstance(calib_data, DataIter): raise ValueError( 'calib_data must be of DataIter type when calib_mode=%s,' ' while received type %s' % (calib_mode, str(type(calib_data)))) mod = Module(symbol=sym, data_names=data_names, label_names=label_names, context=ctx) if len(calib_data.provide_label) > 0: mod.bind(for_training=False, data_shapes=calib_data.provide_data, label_shapes=calib_data.provide_label) else: mod.bind(for_training=False, data_shapes=calib_data.provide_data) mod.set_params(arg_params, aux_params) if calib_mode == 'entropy': nd_dict, num_examples = _collect_layer_outputs( mod, calib_data, include_layer=calib_layer, max_num_examples=num_calib_examples, logger=logger) logger.info( 'Collected layer outputs from FP32 model using %d examples' % num_examples) logger.info('Calculating optimal thresholds for quantization') th_dict = _get_optimal_thresholds(nd_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': th_dict, num_examples = _collect_layer_output_min_max( mod, calib_data, include_layer=calib_layer, max_num_examples=num_calib_examples, logger=logger) logger.info( 'Collected layer output min/max values from FP32 model using %d examples' % num_examples) else: raise ValueError('unknown calibration mode %s received,' ' expected `none`, `naive`, or `entropy`' % calib_mode) logger.info('Calibrating quantized symbol') qsym = _calibrate_quantized_sym(qsym, th_dict) logger.info('Quantizing parameters') qarg_params = _quantize_params(qsym, arg_params, th_dict) return qsym, qarg_params, aux_params
raise ValueError('unsupported model %s' % args.model) data = mx.io.ImageRecordIter(path_imgrec=dataset_path, label_width=1, preprocess_threads=data_nthreads, batch_size=batch_size, data_shape=data_shape, label_name=label_name, rand_crop=False, rand_mirror=False, shuffle=shuffle_dataset, shuffle_chunk_seed=shuffle_seed, seed=shuffle_seed, **mean_args) mod = Module(symbol=convnet_code_sym, label_names=None, context=ctx) mod.bind(for_training=False, data_shapes=data.provide_data) mod.set_params(arg_params, aux_params) num_images = 0 convnet_codes = None # N * 1000 resized_images = None # NCHW labels = None for batch in data: if num_images >= args.max_num_images: break mod.forward(data_batch=batch, is_train=False) fc_output = mod.get_outputs()[0].flatten().copyto(mx.cpu(0)) num_images += batch_size fc_output.wait_to_read() if convnet_codes is None: convnet_codes = fc_output else:
class Solver(object): def __init__(self, symbol, data_names, label_names, data_shapes, label_shapes, logger=logging, context=mx.cpu(), work_load_list=None, fixed_param_names=None): self.symbol = symbol self.data_names = data_names self.label_names = label_names self.data_shapes = data_shapes self.label_shapes = label_shapes self.context = context self.work_load_list = work_load_list self.fixed_param_names = fixed_param_names if logger is None: logger = logging.getLogger() logger.setLevel(logging.INFO) self.logger = logger self.module = Module(symbol=self.symbol, data_names=self.data_names, label_names=self.label_names, logger=self.logger, context=self.context, work_load_list=self.work_load_list, fixed_param_names=self.fixed_param_names) def fit(self, train_data, eval_data=None, eval_metric='acc', validate_metric=None, work_load_list=None, epoch_end_callback=None, batch_end_callback=None, fixed_param_prefix=None, initializer=None, arg_params=None, aux_params=None, allow_missing=False, optimizer=None, optimizer_params=None, begin_epoch=0, num_epoch=None, kvstore='device', teacher_modules=None): if type(teacher_modules) is not list: teacher_modules = [teacher_modules] self.module.bind(data_shapes=self.data_shapes, label_shapes=self.label_shapes, for_training=True) self.module.init_params(initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=allow_missing) self.module.init_optimizer(kvstore=kvstore, optimizer=optimizer, optimizer_params=optimizer_params) if validate_metric is None: validate_metric = eval_metric if not isinstance(eval_metric, metric.EvalMetric): eval_metric = metric.create(eval_metric) # training loop for epoch in range(begin_epoch, num_epoch): tic = time.time() eval_metric.reset() nbatch = 0 data_iter = iter(train_data) end_of_batch = False next_data_batch = next(data_iter) while not end_of_batch: data_batch = next_data_batch if teacher_modules[0] is not None: for teacher_module in teacher_modules: teacher_module.forward(data_batch=data_batch, is_train=True) transfer_label = teacher_module.get_outputs() data_batch.label = data_batch.label + transfer_label self.module.forward(data_batch, is_train=True) self.module.backward() self.module.update() try: next_data_batch = next(data_iter) except StopIteration: end_of_batch = True self.module.update_metric(eval_metric, data_batch.label) if batch_end_callback is not None: batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metric, locals=locals()) for callback in _as_list(batch_end_callback): callback(batch_end_params) nbatch += 1 for name, val in eval_metric.get_name_value(): self.logger.info('Epoch[%d] Train-%s=%f', epoch, name, val) toc = time.time() self.logger.info('Epoch[%d] Time cost=%.3f', epoch, (toc - tic)) arg_params, aux_params = self.module.get_params() self.module.set_params(arg_params, aux_params) if epoch_end_callback is not None: for callback in _as_list(epoch_end_callback): callback(epoch, self.symbol, arg_params, aux_params) if eval_data: res = self.module.score(eval_data, validate_metric, score_end_callback=None, batch_end_callback=None, reset=True, epoch=epoch) for name, val in res: self.logger.info('Epoch[%d] Validation-%s=%f', epoch, name, val) train_data.reset()
def demo_net(sym, class_names, args): # print config # setup context if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu(0) print('called with args\n{}'.format(pprint.pformat(vars(args)))) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) f = open( "/home/skutukov/datasets/VOCdevkit/VOC2007/ImageSets/Main/test.txt", "r") for file in tqdm.tqdm(f.readlines()): path = os.path.join(args.image, str(file).strip() + '.jpg') path = '/home/skutukov/Pictures/demo.jpg' # load single test im_tensor, im_info, im_orig = load_test(path, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds, ctx=ctx) # generate data batch data_batch = generate_batch(im_tensor, im_info) # forward mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] im_info = im_info[0] # decode detection det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) # print out for [cls, conf, x1, y1, x2, y2] in det: if cls > 0 and conf > args.vis_thresh: print(class_names[int(cls)], conf, [x1, y1, x2, y2]) # if vis if args.vis: vis_detection(im_orig, det, class_names, thresh=args.vis_thresh, file=file) break
def test_net(sym, imdb, args, config): logger.addHandler( logging.FileHandler("{0}/{1}".format(args.prefix, 'test.log'))) # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context ctx = mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, mean=config.transform['img_pixel_means'], std=config.transform['img_pixel_stds']) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] # start detection with tqdm(total=imdb.num_images) as pbar: for i, data_batch in enumerate(test_data): # forward im_info = data_batch.data[1][0] mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) for j in range(1, imdb.num_classes): indexes = np.where(det[:, 0] == j)[0] all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :] pbar.update(data_batch.data[0].shape[0]) # evaluate model imdb.evaluate_detections(all_boxes)
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing quantize_model for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing quantize_model for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype sym = get_fp32_sym() batch_size = 4 label_shape = (batch_size, 10) data_shape = (batch_size, 4, 10, 10) length = batch_size # specify num of outputs from split op msym = get_fp32_sym_with_multiple_outputs(length) msym_label_shape = (length, 10) msym_data_shape = (length, 4, 4, 10, 10) for s, dshape, lshape in zip((sym, msym), (data_shape, msym_data_shape), (label_shape, msym_label_shape)): mod = Module(symbol=s) mod.bind(data_shapes=[('data', dshape)], label_shapes=[('softmax_label', lshape)]) mod.init_params() arg_params, aux_params = mod.get_params() qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype)
def demo_net(sym, class_names, args, result_path): # print config print('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu(0) # load single test im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # generate data batch data_batch = generate_batch(im_tensor, im_info) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # forward forward_starts = time.time() mod.forward(data_batch) rois, scores, bbox_deltas = mod.get_outputs() rois.wait_to_read() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] forward_costs = time.time() - forward_starts print("forward costs %.4f" % (forward_costs)) im_info = im_info[0] # decode detection det = im_detect(rois, scores, bbox_deltas, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) fieldnames = ['name', 'coordinate'] if result_path.exists(): csvfile = result_path.open("a") writer = csv.DictWriter(csvfile, fieldnames=fieldnames) else: csvfile = result_path.open("w+") writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() img_name = Path(args.image).name bbox_str = '' for [cls, conf, x1, y1, x2, y2] in det: if cls > 0 and conf > args.vis_thresh: print(class_names[int(cls)], conf, [x1, y1, x2, y2]) bbox_str += "%d_%d_%d_%d;" % (int(x1), int(y1), int(x2 - x1), int(y2 - y1)) writer.writerow({'name': img_name, 'coordinate': bbox_str[:-1]}) csvfile.close() print("detect image %s" % img_name) # if vis if args.vis: vis_detection(im_orig, det, class_names, thresh=args.vis_thresh, prefix=args.image)
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape): mod = mx.mod.Module(symbol=qsym, label_names=None, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.set_params(qarg_params, qaux_params) data = [ mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() batch_size = 4 dshape = (batch_size, 4, 10, 10) data = mx.sym.Variable('data') sym = mx.sym.Convolution(data, kernel=(1, 1), num_filter=16, name='conv0') mod = Module(symbol=sym, label_names=None) mod.bind(data_shapes=[('data', dshape)]) mod.init_params() arg_params, aux_params = mod.get_params() excluded_sym_names = [] qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_forward(qsym, qarg_params, qaux_params, dshape) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype) check_qsym_forward(qsym, qarg_params, qaux_params, dshape)
def demo_net(sym, class_names, args): # print config print('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu(0) # load single test im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # generate data batch data_batch = generate_batch(im_tensor, im_info) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # forward mod.forward(data_batch) rois, scores, bbox_deltas, mask_prob = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] im_info = im_info[0] # decode detection det, masks = im_detect(rois, scores, bbox_deltas, mask_prob, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) im = cv2.imread(args.image) print(im.shape) print(im_info) # print out for index, [cls, conf, x1, y1, x2, y2] in enumerate(det): print(masks[index].max()) if cls > 0 and conf > args.vis_thresh: print(class_names[int(cls)], conf, [x1, y1, x2, y2]) print((int(x1), int(y1)), (int(x2), int(y2))) cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 10) cv2.imwrite("mask{}.png".format(index), np.uint8(masks[index] * 255)) cv2.imwrite('demo.png', im) # if vis if args.vis: vis_detection(im_orig, det, class_names, thresh=args.vis_thresh)
def test_net(sym, imdb, args): # print config logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context ctx = mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, mean=args.img_pixel_means, std=args.img_pixel_stds) # load params arg_params, aux_params = load_param(args.params, ctx=ctx) # produce shape max possible data_names = ['data', 'im_info'] label_names = None data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))] label_shapes = None # check shapes check_shape(sym, data_shapes, arg_params, aux_params) # create and bind module mod = Module(sym, data_names, label_names, context=ctx) mod.bind(data_shapes, label_shapes, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) results_list = [] all_boxes = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] all_masks = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] all_rois = [[[] for _ in range(imdb.num_images)] for _ in range(imdb.num_classes)] # start detection with tqdm(total=imdb.num_images) as pbar: for i, data_batch in enumerate(test_data): # forward im_info = data_batch.data[1][0] mod.forward(data_batch) rois, scores, bbox_deltas, mask_prob = mod.get_outputs() rois = rois[:, 1:] scores = scores[0] bbox_deltas = bbox_deltas[0] det, masks, rois_out = im_detect(rois, scores, bbox_deltas, mask_prob, im_info, bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh, conf_thresh=args.rcnn_conf_thresh) # print(det.shape, masks.shape) for j in range(1, imdb.num_classes): indexes = np.where(det[:, 0] == j)[0] all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]), axis=-1)[indexes, :] # print(type(masks), type(rois_out)) all_masks[j][i] = masks[indexes] all_rois[j][i] = rois_out[indexes] boxes_this_image = [[]] + [ all_boxes[cls_ind][i] for cls_ind in range(1, imdb.num_classes) ] masks_this_image = [[]] + [ all_masks[cls_ind][i] for cls_ind in range(1, imdb.num_classes) ] rois_this_image = [[]] + [ all_rois[cls_ind][i] for cls_ind in range(1, imdb.num_classes) ] results_list.append({ 'image': '{}.png'.format(i), 'im_info': im_info.asnumpy(), 'boxes': boxes_this_image, 'masks': masks_this_image, 'rois': rois_this_image }) pbar.update(data_batch.data[0].shape[0]) # evaluate model results_pack = { 'all_boxes': all_boxes, 'all_masks': all_masks, 'results_list': results_list } imdb.evaluate_mask(results_pack)