def check_hybrid_static_memory(**kwargs): x = mx.nd.random.uniform(shape=(2, 3, 32, 32)) x.attach_grad() net1 = gluon.model_zoo.vision.get_resnet( 1, 18, pretrained=True, prefix='net_', ctx=mx.context.current_context()) net2 = gluon.model_zoo.vision.get_resnet( 1, 18, pretrained=True, prefix='net_', ctx=mx.context.current_context()) net2.hybridize(**kwargs) net1(x) net2(x) def test(net, x): with mx.autograd.record(): y = net(x) + net(x) y.backward() grads = {k: v.grad() for k, v in net.collect_params().items() if v.grad_req != 'null'} return y, grads y1, grads1 = test(net1, x) y2, grads2 = test(net2, x) assert_almost_equal(y1.asnumpy(), y2.asnumpy(), rtol=1e-3, atol=1e-5) for key in grads1: assert_almost_equal(grads1[key].asnumpy(), grads2[key].asnumpy(), rtol=1e-3, atol=1e-5)
def test_activations(): point_to_validate = mx.nd.array([-0.1, 0.1] * 3) swish = mx.gluon.nn.Swish() def swish_test(x): return x * mx.nd.sigmoid(x) for test_point, ref_point in zip(swish_test(point_to_validate), swish(point_to_validate)): assert test_point == ref_point elu = mx.gluon.nn.ELU() def elu_test(x): def elu(x): return 1.0 * (mx.nd.exp(x) - 1) if x < 0 else x return [elu(x_i) for x_i in x] for test_point, ref_point in zip(elu_test(point_to_validate), elu(point_to_validate)): assert test_point == ref_point selu = mx.gluon.nn.SELU() def selu_test(x): def selu(x): scale, alpha = 1.0507009873554804934193349852946, 1.6732632423543772848170429916717 return scale * x if x >= 0 else alpha * mx.nd.exp(x) - alpha return [selu(x_i) for x_i in x] for test_point, ref_point in zip(selu(point_to_validate), selu(point_to_validate)): assert test_point == ref_point prelu = mx.gluon.nn.PReLU() prelu.initialize() x = point_to_validate.reshape((1, 3, 2)) assert_almost_equal(prelu(x).asnumpy(), mx.nd.where(x >= 0, x, 0.25 * x).asnumpy())
def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride, global_pool): with mx.Context('gpu', 0): data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32') pooling_fp32 = mx.sym.Pooling(data=data, kernel=kernel, pad=pad, stride=stride, pool_type=pool_type, global_pool=global_pool, cudnn_off=False) arg_shapes, _, _ = pooling_fp32.infer_shape(data=data_shape) arg_names = pooling_fp32.list_arguments() pooling_fp32_exe = pooling_fp32.simple_bind(ctx=mx.current_context(), grad_req='null') pooling_fp32_exe.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, shape=data_shape).astype('int32') output = pooling_fp32_exe.forward()[0] qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8') min_data = mx.sym.Variable(name='min_data') max_data = mx.sym.Variable(name='max_data') quantized_pooling = mx.sym.contrib.quantized_pooling(data=qdata, min_data=min_data, max_data=max_data, kernel=kernel, pad=pad, stride=stride, pool_type=pool_type, global_pool=global_pool) pooling_int8_exe = quantized_pooling.simple_bind(ctx=mx.current_context(), grad_req='null') qarg_names = quantized_pooling.list_arguments() pooling_int8_exe.arg_dict[qarg_names[0]][:] = pooling_fp32_exe.arg_dict[arg_names[0]].astype('int8') quantized_range = 127.0 pooling_int8_exe.arg_dict[qarg_names[1]][:] = -quantized_range pooling_int8_exe.arg_dict[qarg_names[2]][:] = quantized_range qoutput, min_range, max_range = pooling_int8_exe.forward() if pool_type == 'max': assert_almost_equal(output.asnumpy(), qoutput.asnumpy()) elif pool_type == 'avg': # for avg pooling, fp32 and int8 may be different due to rounding errors diff = mx.nd.abs(output - qoutput.astype(output.dtype)) cond = mx.nd.lesser(2, diff).sum().asscalar() assert cond == 0
def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias): with mx.Context('gpu', 0): # run fp32 conv data = mx.sym.Variable(name='data', shape=data_shape, dtype='float32') conv2d = mx.sym.Convolution(data=data, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, no_bias=no_bias, cudnn_off=False, name='conv2d') arg_shapes, _, _ = conv2d.infer_shape(data=data_shape) arg_names = conv2d.list_arguments() conv_exe_fp32 = conv2d.simple_bind(ctx=mx.current_context(), grad_req='null') conv_exe_fp32.arg_dict[arg_names[0]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, shape=data_shape).astype('int32') conv_exe_fp32.arg_dict[arg_names[1]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, shape=arg_shapes[1]).astype('int32') if not no_bias: conv_exe_fp32.arg_dict[arg_names[2]][:] = mx.nd.random.uniform(low=-127.0, high=127.0, shape=arg_shapes[2]).astype('int32') output = conv_exe_fp32.forward()[0] # run quantized conv qdata = mx.sym.Variable(name='qdata', shape=data_shape, dtype='int8') qweight = mx.sym.Variable(name='qweight', dtype='int8') min_data = mx.sym.Variable(name='min_data') max_data = mx.sym.Variable(name='max_data') min_weight = mx.sym.Variable(name='min_weight') max_weight = mx.sym.Variable(name='max_weight') quantized_conv2d = mx.sym.contrib.quantized_conv(data=qdata, weight=qweight, min_data=min_data, max_data=max_data, min_weight=min_weight, max_weight=max_weight, kernel=kernel, num_filter=num_filter, pad=pad, stride=stride, no_bias=no_bias) qarg_names = quantized_conv2d.list_arguments() type_dict = None if not no_bias: type_dict = {qarg_names[2]: 'int8'} conv_exe_int8 = quantized_conv2d.simple_bind(ctx=mx.current_context(), type_dict=type_dict, grad_req='null') conv_exe_int8.arg_dict[qarg_names[0]][:] = conv_exe_fp32.arg_dict[arg_names[0]].astype('int8') conv_exe_int8.arg_dict[qarg_names[1]][:] = conv_exe_fp32.arg_dict[arg_names[1]].astype('int8') quantized_range = 127.0 if no_bias: conv_exe_int8.arg_dict[qarg_names[2]][:] = -quantized_range conv_exe_int8.arg_dict[qarg_names[3]][:] = quantized_range conv_exe_int8.arg_dict[qarg_names[4]][:] = -quantized_range conv_exe_int8.arg_dict[qarg_names[5]][:] = quantized_range else: conv_exe_int8.arg_dict[qarg_names[2]][:] = conv_exe_fp32.arg_dict[arg_names[2]].astype('int8') conv_exe_int8.arg_dict[qarg_names[3]][:] = -quantized_range conv_exe_int8.arg_dict[qarg_names[4]][:] = quantized_range conv_exe_int8.arg_dict[qarg_names[5]][:] = -quantized_range conv_exe_int8.arg_dict[qarg_names[6]][:] = quantized_range conv_exe_int8.arg_dict[qarg_names[7]][:] = -quantized_range conv_exe_int8.arg_dict[qarg_names[8]][:] = quantized_range qoutput, min_range, max_range = conv_exe_int8.forward() if no_bias: assert_almost_equal(output.asnumpy(), qoutput.asnumpy()) else: # with adding bias, accuracy loss should not be greater than one diff = mx.nd.abs(output - qoutput.astype(output.dtype)) cond = mx.nd.lesser(2, diff).sum().asscalar() assert cond == 0
def check_rsp_pull(kv, ctxs, sparse_pull, is_same_rowid=False, use_slice=False): count = len(ctxs) num_rows = shape[0] row_ids = [] all_row_ids = np.arange(num_rows) vals = [mx.nd.sparse.zeros(shape=shape, ctx=ctxs[i], stype='row_sparse') for i in range(count)] if is_same_rowid: row_id = np.random.randint(num_rows, size=num_rows) row_ids = [mx.nd.array(row_id)] * count elif use_slice: total_row_ids = mx.nd.array(np.random.randint(num_rows, size=count*num_rows)) row_ids = [total_row_ids[i*num_rows : (i+1)*num_rows] for i in range(count)] else: for i in range(count): row_id = np.random.randint(num_rows, size=num_rows) row_ids.append(mx.nd.array(row_id)) row_ids_to_pull = row_ids[0] if (len(row_ids) == 1 or is_same_rowid) else row_ids vals_to_pull = vals[0] if len(vals) == 1 else vals kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull) for val, row_id in zip(vals, row_ids): retained = val.asnumpy() excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy()) for row in range(num_rows): expected_val = np.zeros_like(retained[row]) expected_val += 0 if row in excluded_row_ids else 2 assert_almost_equal(retained[row], expected_val) if sparse_pull is True: kv.pull('e', out=vals_to_pull, ignore_sparse=False) for val in vals: retained = val.asnumpy() expected_val = np.zeros_like(retained) expected_val[:] = 2 assert_almost_equal(retained, expected_val)
def check_compr_random(kv, threshold, nworker): # set a seed so all workers generate same data. knowing this helps # calculate expected value after pull mx.random.seed(123) rnd.seed(123) nrepeat = 5 compr_random_keys_shapes = [('2121', shape),('212221',irregular_shape),('21221', big_shape)] # use new keys so residual is 0 for calculation of expected for k,s in compr_random_keys_shapes: kv.init(k, mx.nd.zeros(s)) for k,s in compr_random_keys_shapes: curr_residual = np.zeros(s) for l in range(nrepeat): orig_val = mx.nd.zeros(s) kv.pull(k, orig_val) grad = mx.nd.array(rnd.rand(s[0], s[1])) # creates a copy because push changes grad because of assignment grad_cpy = mx.nd.array(grad) kv.push(k, grad) val = mx.nd.zeros(s) kv.pull(k, val) diff = val - orig_val # compute expected by using simulation of operator compr, curr_residual, decompr = compute_expected_2bit_quantization(grad_cpy, curr_residual, threshold) decompr *= nworker * rate assert_almost_equal(diff.asnumpy(), decompr)
def test_req(): data = mx.nd.random.uniform(shape=(1,3,224,224)) label = mx.nd.random.uniform(shape=(1)) label[:] = 1 loss = gluon.loss.SoftmaxCrossEntropyLoss() net = nn.HybridSequential() net1 = nn.HybridSequential() net1.add(nn.Dense(4)) net2 = nn.HybridSequential() net2.add(nn.Dense(3)) net2.add(nn.Dense(2)) net.add(net1) net.add(net2) net.initialize() net.hybridize() for v in net.collect_params().values(): v.grad_req = 'add' net.collect_params().zero_grad() with mx.autograd.record(): pred = net(data) l = loss(pred, label) l.backward() grad = net[0][0].weight.grad().mean().asnumpy() # run twice to check req = add pred = net(data) l = loss(pred, label) l.backward() grad_double = net[0][0].weight.grad().mean().asnumpy() assert_almost_equal(grad * 2, grad_double)
def test_gnmt_encoder(): ctx = mx.Context.default_ctx for cell_type in ["lstm", "gru", "relu_rnn", "tanh_rnn"]: for num_layers, num_bi_layers in [(2, 1), (3, 0)]: for use_residual in [False, True]: encoder = GNMTEncoder(cell_type=cell_type, num_layers=num_layers, num_bi_layers=num_bi_layers, hidden_size=8, dropout=0.0, use_residual=use_residual, prefix='gnmt_encoder_') encoder.initialize(ctx=ctx) encoder.hybridize() for batch_size in [4]: for seq_length in [5, 10]: inputs_nd = mx.nd.random.normal(0, 1, shape=(batch_size, seq_length, 4), ctx=ctx) valid_length_nd = mx.nd.array(np.random.randint(1, seq_length, size=(batch_size,)), ctx=ctx) encoder_outputs, _ = encoder(inputs_nd, valid_length=valid_length_nd) valid_length_npy = valid_length_nd.asnumpy() rnn_output = encoder_outputs[0].asnumpy() for i in range(batch_size): if valid_length_npy[i] < seq_length - 1: padded_out = rnn_output[i, int(valid_length_npy[i]):, :] assert_almost_equal(padded_out, np.zeros_like(padded_out), 1E-6, 1E-6) assert(encoder_outputs[0].shape == (batch_size, seq_length, 8)) assert(len(encoder_outputs[1]) == num_layers)
def pull_init_test(kv): # checks that compression is not applied to init of key out = [mx.nd.zeros(shapes[0], mx.gpu(g)) for g in range(nworker)] kv.pull(gc_init_test_key, out=out) exp = np.ones_like(out[0].asnumpy()) for o in out: assert_almost_equal(o.asnumpy(), exp)
def check_with_uniform(uf, arg_shapes, dim=None, npuf=None, rmin=-10, type_list=[np.float32]): """check function consistency with uniform random numbers""" if isinstance(arg_shapes, int): assert dim shape = tuple(np.random.randint(1, int(1000**(1.0/dim)), size=dim)) arg_shapes = [shape] * arg_shapes for dtype in type_list: ndarray_arg = [] numpy_arg = [] for s in arg_shapes: npy = np.random.uniform(rmin, 10, s).astype(dtype) narr = mx.nd.array(npy, dtype=dtype) ndarray_arg.append(narr) numpy_arg.append(npy) out1 = uf(*ndarray_arg) if npuf is None: out2 = uf(*numpy_arg).astype(dtype) else: out2 = npuf(*numpy_arg).astype(dtype) assert out1.shape == out2.shape if isinstance(out1, mx.nd.NDArray): out1 = out1.asnumpy() if dtype == np.float16: assert_almost_equal(out1, out2, rtol=2e-3) else: assert_almost_equal(out1, out2)
def _check_subgraph_exe1(sym, subgraph_backend, op_names): """Use the partitioned sym to simple_bind an executor and compare the outputs with those of the original executor""" out = SymbolHandle() check_call(_LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names), ctypes.byref(out))) partitioned_sym = Symbol(out) assert partitioned_sym.list_inputs() == sym.list_inputs() assert partitioned_sym.list_arguments() == sym.list_arguments() assert partitioned_sym.list_auxiliary_states() == sym.list_auxiliary_states() exe = sym.simple_bind(ctx=mx.current_context(), grad_req='null') partitioned_exe = partitioned_sym.simple_bind(ctx=mx.current_context(), grad_req='null') input_names = sym.list_inputs() for name in input_names: if name in exe.arg_dict: exe.arg_dict[name][:] = mx.nd.random.uniform(shape=exe.arg_dict[name].shape) partitioned_exe.arg_dict[name][:] = exe.arg_dict[name] else: assert name in exe.aux_dict exe.aux_dict[name][:] = mx.nd.random.uniform(shape=exe.aux_dict[name].shape) partitioned_exe.aux_dict[name][:] = exe.aux_dict[name] exe.forward() partitioned_exe.forward() assert len(exe.outputs) == len(partitioned_exe.outputs) for i in range(len(exe.outputs)): assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
def _check_subgraph_exe4(sym, subgraph_backend, op_names): """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in bind and compare results of the partitioned sym and the original sym.""" def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): if subgraph_backend is not None: os.environ['MXNET_SUBGRAPH_BACKEND'] = subgraph_backend check_call(_LIB.MXSetSubgraphPropertyOpNames(c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names))) arg_shapes, _, aux_shapes = sym.infer_shape() if subgraph_backend is None: arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes] aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes] else: arg_array = None aux_array = None exe = sym.bind(ctx=mx.current_context(), args=arg_array if subgraph_backend is None else original_exec.arg_arrays, aux_states=aux_array if subgraph_backend is None else original_exec.aux_arrays, grad_req='null') exe.forward() if subgraph_backend is not None: check_call(_LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend))) del os.environ['MXNET_SUBGRAPH_BACKEND'] return exe original_exec = get_executor(sym) partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec) outputs1 = original_exec.outputs outputs2 = partitioned_exec.outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
def _check_subgraph_exe2(sym, subgraph_backend, op_names): """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in simple_bind and compare results of the partitioned sym and the original sym.""" def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): if subgraph_backend is not None: os.environ['MXNET_SUBGRAPH_BACKEND'] = subgraph_backend check_call(_LIB.MXSetSubgraphPropertyOpNames(c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names))) exe = sym.simple_bind(ctx=mx.current_context(), grad_req='null') input_names = sym.list_inputs() for name in input_names: if name in exe.arg_dict: exe.arg_dict[name][:] = mx.nd.random.uniform(shape=exe.arg_dict[name].shape)\ if original_exec is None else original_exec.arg_dict[name] else: assert name in exe.aux_dict exe.aux_dict[name][:] = mx.nd.random.uniform(shape=exe.aux_dict[name].shape)\ if original_exec is None else original_exec.aux_dict[name] exe.forward() if subgraph_backend is not None: check_call(_LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend))) del os.environ['MXNET_SUBGRAPH_BACKEND'] return exe original_exec = get_executor(sym) partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec) outputs1 = original_exec.outputs outputs2 = partitioned_exec.outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
def test_bce_loss_with_pos_weight(): # Suppose it's a multi-label classification N = np.random.randint(5, 30) data = mx.nd.random.uniform(-1, 1, shape=(N, 20)) label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32') pos_weight = mx.nd.random.uniform(0, 10, shape=(1, 5)) pos_weight = mx.nd.repeat(pos_weight, repeats=N, axis=0) data_iter = mx.io.NDArrayIter(data, {'label': label, 'pos_w': pos_weight}, batch_size=10, label_name='label') output = get_net(5) l = mx.symbol.Variable('label') pos_w = mx.symbol.Variable('pos_w') Loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() loss = Loss(output, l, None, pos_w) loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'pos_w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, eval_metric=mx.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.nd.random.uniform(-5, 5, shape=(N, 5)) label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32') pos_weight = mx.nd.random.uniform(0, 10, shape=(1, 5)) mx_bce_loss = Loss(data, label, None, pos_weight).asnumpy() prob_npy = 1.0 / (1.0 + np.exp(-data.asnumpy())) label_npy = label.asnumpy() pos_weight_npy = pos_weight.asnumpy() npy_bce_loss = (- label_npy * np.log(prob_npy)*pos_weight_npy - (1 - label_npy) * np.log(1 - prob_npy)).mean(axis=1) assert_almost_equal(mx_bce_loss, npy_bce_loss, rtol=1e-4, atol=1e-5)
def _check_subgraph_exe3(sym, subgraph_backend, op_names): """Use the partitioned sym to bind an executor and compare the outputs with those of the original executor""" out = SymbolHandle() check_call(_LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names), ctypes.byref(out))) partitioned_sym = Symbol(out) input_names = sym.list_inputs() arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() assert partitioned_sym.list_inputs() == input_names assert partitioned_sym.list_arguments() == arg_names assert partitioned_sym.list_auxiliary_states() == aux_names arg_shapes, _, aux_shapes = sym.infer_shape() arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes] aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes] exe = sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') partitioned_exe = partitioned_sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') exe.forward() partitioned_exe.forward() assert len(exe.outputs) == len(partitioned_exe.outputs) for i in range(len(exe.outputs)): assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
def test_tensorrt_resnet18_feature_vect(): print("downloading sample input") input_data = get_image(url) gluon_resnet18 = vision.resnet18_v2(pretrained=True) gluon_resnet18.hybridize() gluon_resnet18.forward(input_data) gluon_resnet18.export(model_file_name) sym, arg_params, aux_params = mx.model.load_checkpoint(model_file_name, 0) executor = sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y = executor.forward(is_train=False, data=input_data) trt_sym = sym.get_backend_symbol('TensorRT') mx.contrib.tensorrt.init_tensorrt_params(trt_sym, arg_params, aux_params) original_precision_value = mx.contrib.tensorrt.get_use_fp16() try: mx.contrib.tensorrt.set_use_fp16(True) executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y_trt = executor.forward(is_train=False, data=input_data) mx.contrib.tensorrt.set_use_fp16(False) executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape, grad_req='null', force_rebind=True) executor.copy_params_from(arg_params, aux_params) y_trt_fp32 = executor.forward(is_train=False, data=input_data) no_trt_output = y[0].asnumpy()[0] trt_output = y_trt[0].asnumpy()[0] trt_fp32_output = y_trt_fp32[0].asnumpy()[0] assert_almost_equal(no_trt_output, trt_output, 1e-1, 1e-2) assert_almost_equal(no_trt_output, trt_fp32_output, 1e-4, 1e-4) finally: mx.contrib.tensorrt.set_use_fp16(original_precision_value)
def softmax_forward(input_data, true_output): data = mx.sym.Variable('data') out1 = data.softmax(axis=1) exec1 = out1.bind(mx.cpu(), args={'data': input_data}) exec1.forward()[0].wait_to_read() ndarr = exec1.outputs[0][0][0][0] nparr = ndarr.asnumpy() assert_almost_equal(nparr, true_output, rtol=1e-5, atol=1e-5)
def pull_before_push(kv): for i in range(nrepeat): for j in range(len(keys)): out = [mx.nd.ones(shapes[j], mx.gpu(g)) for g in range(nworker)] kv.pull(keys[j], out=out) exp = np.zeros_like(out[0].asnumpy()) for o in out: assert_almost_equal(o.asnumpy(), exp)
def test_bce_equal_ce2(): N = 100 loss1 = gluon.loss.SigmoidBCELoss(from_sigmoid=True) loss2 = gluon.loss.SoftmaxCELoss(from_logits=True) out1 = mx.random.uniform(0.1, 0.9, shape=(N, 1)) out2 = mx.nd.log(mx.nd.concat(1-out1, out1, dim=1) + 1e-8) label = mx.nd.round(mx.random.uniform(0, 1, shape=(N, 1))) assert_almost_equal(loss1(out1, label).asnumpy(), loss2(out2, label).asnumpy())
def test_smooth_distribution(): assert_exception(lambda: mx.contrib.quant._smooth_distribution(np.zeros((2,)), eps=1e-3), ValueError) dirac_delta = np.zeros((5,)) dirac_delta[2] = 1 smooth_dirac_delta = dirac_delta.copy() smooth_dirac_delta += 1e-3 smooth_dirac_delta[2] -= 5e-3 assert_almost_equal(mx.contrib.quant._smooth_distribution(dirac_delta, eps=1e-3), smooth_dirac_delta)
def test_normalize(): data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) data_in = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) out_nd = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in) data_expected = data_in.asnumpy() data_expected[:][:][0] = data_expected[:][:][0] / 3.0 data_expected[:][:][1] = (data_expected[:][:][1] - 1.0) / 2.0 data_expected[:][:][2] = data_expected[:][:][2] - 2.0 assert_almost_equal(data_expected, out_nd.asnumpy())
def test_logistic_loss_equal_bce(): N = 100 loss_binary = gluon.loss.LogisticLoss(label_format='binary') loss_signed = gluon.loss.LogisticLoss(label_format='signed') loss_bce = gluon.loss.SigmoidBCELoss(from_sigmoid=False) data = mx.random.uniform(-10, 10, shape=(N, 1)) label = mx.nd.round(mx.random.uniform(0, 1, shape=(N, 1))) assert_almost_equal(loss_binary(data, label).asnumpy(), loss_bce(data, label).asnumpy()) assert_almost_equal(loss_signed(data, 2 * label - 1).asnumpy(), loss_bce(data, label).asnumpy())
def test_zero_grad(): data = mx.nd.random.uniform(shape=(3,3)) net = nn.Embedding(3, 4, sparse_grad=True, prefix='test_zero_grad_') net.initialize() with mx.autograd.record(): l = net(data) l.backward() net.collect_params().zero_grad() grad = net.collect_params()['test_zero_grad_weight'].grad() assert_almost_equal(grad.asnumpy(), grad.asnumpy() * 0)
def bench_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, lhs_den, rhs_den, trans_lhs, ctx, num_repeat=10, fw="mxnet", distribution="uniform"): set_default_context(ctx) assert fw == "mxnet" or fw == "scipy" # Set funcs dot_func_sparse = mx.nd.sparse.dot if fw == "mxnet" else sp.spmatrix.dot dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot # Create matrix instances lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den, distribution=distribution) # only uniform distribution supported for rhs if rhs_stype == 'csr': rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution=distribution) else: rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution="uniform") lhs_dns = None rhs_dns = None dense_cost = None sparse_cost = None if fw == "mxnet": lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype('default') rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default') # One warm up run, verify correctness out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs) out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs) assert_almost_equal(out.asnumpy(), out_expected.asnumpy(), rtol=1e-1, atol=1e-1) sparse_cost = measure_cost(num_repeat, False, False, dot_func_sparse, lhs_nd, rhs_nd, trans_lhs) dense_cost = measure_cost(num_repeat, False, False, dot_func_dense, lhs_dns, rhs_dns, trans_lhs) else: lhs_dns = lhs_nd.asnumpy() rhs_dns = rhs_nd.asnumpy() lhs_nd = sp.csr_matrix(lhs_nd.asnumpy()) rhs_nd = rhs_nd.asnumpy() # One warm up run, verify correctness lhs_nd_copy = sp.spmatrix.transpose(lhs_nd) if trans_lhs else lhs_nd out = dot_func_sparse(lhs_nd_copy, rhs_dns) sparse_cost = measure_cost(num_repeat, trans_lhs, False, dot_func_sparse, lhs_nd, rhs_nd) dense_cost = measure_cost(num_repeat, trans_lhs, True, dot_func_dense, lhs_dns, rhs_dns) speedup = dense_cost / sparse_cost # Print results m = lhs_shape[0] k = lhs_shape[1] n = rhs_shape[1] result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}' results = result_pattern.format(lhs_den*100, rhs_den*100, str(ctx), m, k, n, sparse_cost*1000, dense_cost*1000, speedup) print(results)
def test_mkldnn_ndarray_slice(): ctx = mx.cpu() net = gluon.nn.HybridSequential() with net.name_scope(): net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None)) net.collect_params().initialize(ctx=ctx) x = mx.nd.array(np.ones([32, 3, 224, 224]), ctx) y = net(x) # trigger computation on ndarray slice assert_almost_equal(y[0].asnumpy()[0, 0, 0], 0.3376348)
def test_global_norm_clip_multi_device(): for check_isfinite in [True, False]: x1 = mx.nd.ones((3,3), ctx=mx.gpu(0)) x2 = mx.nd.ones((4,4), ctx=mx.cpu(0)) norm = gluon.utils.clip_global_norm([x1, x2], 1.0, check_isfinite=check_isfinite) if check_isfinite: assert norm == 5.0 else: assert norm.asscalar() == 5.0 assert_almost_equal(x1.asnumpy(), np.ones((3, 3)) / 5) assert_almost_equal(x2.asnumpy(), np.ones((4, 4)) / 5)
def test_inference(): all_models = ['resnet50_v1', 'vgg19_bn', 'alexnet', #'inceptionv3', 'densenet201', 'squeezenet1.0', 'mobilenet0.25'] batch_size = 10 download_data() for model_name in all_models: eprint('testing inference on %s'%model_name) data_shape = (3, 224, 224) if 'inception' not in model_name else (3, 299, 299) dataIter = mx.io.ImageRecordIter( path_imgrec = VAL_DATA, label_width = 1, preprocess_threads = 1, batch_size = batch_size, data_shape = data_shape, label_name = 'softmax_label', rand_crop = False, rand_mirror = False) data_batch = dataIter.next() data = data_batch.data[0] label = data_batch.label[0] gpu_data = data.as_in_context(mx.gpu()) gpu_label = label.as_in_context(mx.gpu()) # This is to create a model and run the model once to initialize # all parameters. cpu_model = get_model(model_name) cpu_model.collect_params().initialize(ctx=mx.cpu()) cpu_model(mx.nd.array(data, ctx=mx.cpu())) gpu_model = get_model(model_name) gpu_model.collect_params().initialize(ctx=mx.gpu()) gpu_model(mx.nd.array(data, ctx=mx.gpu())) # Force the two models have the same parameters. cpu_params = cpu_model.collect_params() gpu_params = gpu_model.collect_params() for k in cpu_params.keys(): k = k.replace(cpu_params.prefix, '') cpu_param = cpu_params.get(k) gpu_param = gpu_params.get(k) gpu_param.set_data(cpu_param.data().as_in_context(mx.gpu())) for i in range(5): # Run inference. with autograd.record(train_mode=False): cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu())) gpu_out = gpu_model(gpu_data) out = cpu_out.asnumpy() max_val = np.max(np.abs(out)) gpu_max_val = np.max(np.abs(gpu_out.asnumpy())) eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val)) assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, rtol=1e-3, atol=1e-3)
def test_row_sparse_pull_single_device(): kvstore = mx.kv.create('device') copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0)) grad = copy.tostype("row_sparse") key = 0 kvstore.init(key, grad) idx = grad.indices kvstore.push(key, grad) kvstore.row_sparse_pull(key, out=grad, row_ids=idx) assert_almost_equal(grad.asnumpy(), copy.asnumpy())
def test_mkldnn_sum_inplace_with_cpu_layout(): x_shape = (32, 3, 224, 224) x_npy = np.ones(x_shape) y_shape = (32, 32, 222, 222) y_npy = np.ones(y_shape) x = mx.sym.Variable("x") y = mx.sym.Variable("y") z = mx.symbol.Convolution(data=x, num_filter=32, kernel=(3, 3)) z = mx.sym.add_n(z, y) exe = z.simple_bind(ctx=mx.cpu(), x=x_shape, y=y_shape) out = exe.forward(is_train=False, x=x_npy, y=y_npy)[0] assert_almost_equal(out[0].asnumpy()[0, 0, 0], 1.0)
def test_global_norm_clip(): x1 = mx.nd.ones((3,3)) x2 = mx.nd.ones((4,4)) norm = gluon.utils.clip_global_norm([x1, x2], 1.0) assert norm == 5.0 assert_almost_equal(x1.asnumpy(), np.ones((3,3))/5) assert_almost_equal(x2.asnumpy(), np.ones((4,4))/5) x3 = mx.nd.array([1.0, 2.0, float('nan')]) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") gluon.utils.clip_global_norm([x1, x3], 2.0) assert len(w) == 1
def test_quantize_float32_to_int8(): shape = rand_shape_nd(4) data = rand_ndarray(shape, 'default', dtype='float32') min_range = mx.nd.min(data) max_range = mx.nd.max(data) qdata, min_val, max_val = mx.nd.contrib.quantize(data, min_range, max_range, out_type='int8') data_np = data.asnumpy() min_range = min_range.asscalar() max_range = max_range.asscalar() real_range = np.maximum(np.abs(min_range), np.abs(max_range)) quantized_range = 127.0 scale = quantized_range / real_range assert qdata.dtype == np.int8 assert min_val.dtype == np.float32 assert max_val.dtype == np.float32 assert same(min_val.asscalar(), -real_range) assert same(max_val.asscalar(), real_range) qdata_np = (np.sign(data_np) * np.minimum( np.abs(data_np) * scale + 0.5, quantized_range)).astype(np.int8) assert_almost_equal(qdata.asnumpy(), qdata_np, atol=1)
def test_random_rotation(): # test exceptions for probability input outside of [0,1] assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=1.1) assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=-0.3) # test `forward` transformer = transforms.RandomRotation([-10, 10.]) assertRaises(TypeError, transformer, mx.np.ones((3, 30, 60), dtype='uint8')) single_image = mx.np.ones((3, 30, 60), dtype='float32') single_output = transformer(single_image) assert same(single_output.shape, (3, 30, 60)) batch_image = mx.np.ones((3, 3, 30, 60), dtype='float32') batch_output = transformer(batch_image) assert same(batch_output.shape, (3, 3, 30, 60)) # test identity (rotate_with_proba = 0) transformer = transforms.RandomRotation([-100., 100.], rotate_with_proba=0.0) data = mx.np.random.normal(size=(3, 30, 60)) assert_almost_equal(data.asnumpy(), transformer(data).asnumpy())
def test_bce_loss(): N = 20 data = mx.random.uniform(-1, 1, shape=(N, 20)) label = mx.nd.array(np.random.randint(2, size=(N,)), dtype='float32') data_iter = mx.io.NDArrayIter(data, label, batch_size=10, label_name='label') output = get_net(1) l = mx.symbol.Variable('label') Loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() loss = Loss(output, l) loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.random.uniform(-5, 5, shape=(10,)) label = mx.random.uniform(0, 1, shape=(10,)) mx_bce_loss = Loss(data, label).asnumpy() prob_npy = 1.0 / (1.0 + np.exp(-data.asnumpy())) label_npy = label.asnumpy() npy_bce_loss = - label_npy * np.log(prob_npy) - (1 - label_npy) * np.log(1 - prob_npy) assert_almost_equal(mx_bce_loss, npy_bce_loss, rtol=1e-4, atol=1e-5)
def test_cosine_loss(hybridize): #Generating samples input1 = mx.np.random.randn(3, 2) input2 = mx.np.random.randn(3, 2) label = mx.np.sign(mx.np.random.randn(input1.shape[0])) #Calculating loss from cosine embedding loss function in Gluon Loss = gluon.loss.CosineEmbeddingLoss() if hybridize: Loss.hybridize() loss = Loss(input1, input2, label) # Calculating the loss Numpy way numerator = mx.np.sum(input1 * input2, keepdims=True, axis=1) denominator = mx.np.sqrt(mx.np.sum(input1**2, axis=1, keepdims=True)) \ * mx.np.sqrt(mx.np.sum(input2**2, axis=1, keepdims=True)) x = numerator / denominator label = mx.npx.reshape(label, (-1, 1)) numpy_loss = mx.npx.reshape(mx.np.where(label == 1, 1 - x, mx.npx.relu(x)), (-1, )) assert_almost_equal(loss.asnumpy(), numpy_loss.asnumpy(), rtol=1e-3, atol=1e-5)
def test_export(): ctx = mx.context.current_context() model = gluon.model_zoo.vision.resnet18_v1(prefix='resnet', ctx=ctx, pretrained=True) model.hybridize() data = mx.nd.random.normal(shape=(1, 3, 224, 224)) out = model(data) model.export('gluon') module = mx.mod.Module.load('gluon', 0, label_names=None, context=ctx) module.bind(data_shapes=[('data', data.shape)]) module.forward(mx.io.DataBatch([data], None), is_train=False) mod_out, = module.get_outputs() assert_almost_equal(out.asnumpy(), mod_out.asnumpy()) model2 = gluon.model_zoo.vision.resnet18_v1(prefix='resnet', ctx=ctx) model2.collect_params().load('gluon-0000.params', ctx) out2 = model2(data) assert_almost_equal(out.asnumpy(), out2.asnumpy())
def test_np_get_constant(): const_arr = _np.random.uniform(0, 100, size=(10, 10)).astype(_np.float32) class Foo(gluon.HybridBlock): def __init__(self): super(Foo, self).__init__() self.weight = gluon.Constant(const_arr) def forward(self, x): ctx = x.ctx return x + self.weight.data(ctx).astype(np.float32) x = np.random.uniform(size=const_arr.shape, dtype=const_arr.dtype) for hybridize in [False, True]: foo = Foo() if hybridize: foo.hybridize() foo.initialize() out = foo(x) assert_almost_equal(out.asnumpy(), (x.asnumpy() + const_arr), atol=1e-5, rtol=1e-4, use_broadcast=False)
def test_gluon_ctc_consistency(): loss = mx.gluon.loss.CTCLoss() data = mx.nd.arange(0, 4, repeat=40, ctx=mx.gpu(0)).reshape( (2, 20, 4)).flip(axis=0) cpu_label = mx.nd.array([[2, 1, -1, -1], [3, 2, 2, -1]], ctx=mx.cpu(0)) gpu_label = mx.nd.array([[2, 1, -1, -1], [3, 2, 2, -1]], ctx=mx.gpu(0)) cpu_data = data.copy().as_in_context(mx.cpu(0)) cpu_data.attach_grad() with mx.autograd.record(): l_cpu = loss(cpu_data, cpu_label) l_cpu.backward() gpu_data = data.copyto(mx.gpu(0)) gpu_data.attach_grad() with mx.autograd.record(): l_gpu = loss(gpu_data, gpu_label) l_gpu.backward() assert_almost_equal(cpu_data.grad.asnumpy(), gpu_data.grad.asnumpy(), atol=1e-3, rtol=1e-3)
def test_global_norm_clip_multi_device(): for check_isfinite in [True, False]: x1 = mx.nd.ones((3, 3), ctx=mx.gpu(0)) x2 = mx.nd.ones((4, 4), ctx=mx.cpu(0)) x3 = mx.nd.ones((7, 4), ctx=mx.gpu(0)) x4 = mx.nd.ones((7, 4), ctx=mx.cpu(0)) norm = gluon.utils.clip_global_norm( [x1, x2, x3, x4], 1.0, check_isfinite=check_isfinite) if check_isfinite: assert norm == 9.0 else: assert norm.asscalar() == 9.0 assert_almost_equal(x1, np.ones((3, 3)) / 9) assert_almost_equal(x2, np.ones((4, 4)) / 9) assert_almost_equal(x3, np.ones((7, 4)) / 9) assert_almost_equal(x4, np.ones((7, 4)) / 9)
def test_get_optimal_thresholds(): # Given an ndarray with elements following a uniform distribution, the optimal threshold # for quantizing the ndarray should be either abs(min(nd)) or abs(max(nd)). def get_threshold(nd): min_nd = mx.nd.min(nd) max_nd = mx.nd.max(nd) return mx.nd.maximum(mx.nd.abs(min_nd), mx.nd.abs(max_nd)).asnumpy() for dtype in ['uint8', 'int8', 'auto']: nd_dict = { 'layer1': mx.nd.uniform(low=-10.532, high=11.3432, shape=(8, 3, 23, 23), dtype=np.float64) } expected_threshold = get_threshold(nd_dict['layer1']) th_dict = mx.contrib.quant._get_optimal_thresholds(nd_dict, dtype) assert 'layer1' in th_dict assert_almost_equal(np.array([th_dict['layer1'][1]]), expected_threshold, rtol=1e-2, atol=1e-4)
def compare_optimizer(opt1, opt2, shape, dtype, w_stype='default', g_stype='default', rtol=1e-4, atol=1e-5, compare_states=True): """Compare opt1 and opt2.""" if w_stype == 'default': w2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) w1 = w2.copyto(default_context()) elif w_stype == 'row_sparse' or w_stype == 'csr': w2 = rand_ndarray(shape, w_stype, density=1, dtype=dtype) w1 = w2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") if g_stype == 'default': g2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) g1 = g2.copyto(default_context()) elif g_stype == 'row_sparse' or g_stype == 'csr': g2 = rand_ndarray(shape, g_stype, dtype=dtype) g1 = g2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") state1 = opt1.create_state_multi_precision(0, w1) state2 = opt2.create_state_multi_precision(0, w2) if compare_states: compare_ndarray_tuple(state1, state2) opt1.update_multi_precision(0, w1, g1, state1) opt2.update_multi_precision(0, w2, g2, state2) if compare_states: compare_ndarray_tuple(state1, state2, rtol=rtol, atol=atol) assert_almost_equal(w1.asnumpy(), w2.asnumpy(), rtol=rtol, atol=atol)
def test_np_broadcast_to(): class TestBroadcastTo(HybridBlock): def __init__(self, dst_shape): super(TestBroadcastTo, self).__init__() self._dst_shape = dst_shape def hybrid_forward(self, F, x): return F.np.broadcast_to(x, self._dst_shape) shapes = [((), (1, 2, 4, 5)), ((1, ), (4, 5, 6)), ((1, 0), (2, 4, 0)), ((1, 1), (2, 4, 0)), ((4, 1), (1, 2, 3, 4, 5)), ((4, 1), (1, 0, 3, 4, 5))] for src_shape, dst_shape in shapes: for hybridize in [True, False]: test_broadcast_to = TestBroadcastTo(dst_shape) if hybridize: test_broadcast_to.hybridize() a = _np.random.uniform(size=src_shape).astype(np.float32) expected_ret = _np.broadcast_to(a, dst_shape) a_mx = np.array(a, dtype=a.dtype) a_mx.attach_grad() with mx.autograd.record(): ret = test_broadcast_to(a_mx) assert_almost_equal(ret.asnumpy(), expected_ret, rtol=1e-5, atol=1e-6, use_broadcast=False) ret.backward() expected_grad = collapse_sum_like(_np.ones_like(expected_ret), src_shape) assert_almost_equal(a_mx.grad.asnumpy(), expected_grad, rtol=1e-5, atol=1e-6, use_broadcast=False)
def test_subgraph_exe2(sym, subgraph_backend, op_names): """Use env var MXNET_SUBGRAPH_BACKEND=default to trigger graph partitioning in _simple_bind and compare results of the partitioned sym and the original sym.""" def get_executor(sym, subgraph_backend=None, op_names=None, original_exec=None): exe = sym._simple_bind(ctx=mx.current_context(), grad_req='null') input_names = sym.list_inputs() for name in input_names: if name in exe.arg_dict: exe.arg_dict[name][:] = mx.nd.random.uniform(shape=exe.arg_dict[name].shape)\ if original_exec is None else original_exec.arg_dict[name] else: assert name in exe.aux_dict exe.aux_dict[name][:] = mx.nd.random.uniform(shape=exe.aux_dict[name].shape)\ if original_exec is None else original_exec.aux_dict[name] exe.forward() return exe sym, _, _ = sym original_exec = get_executor(sym) with environment('MXNET_SUBGRAPH_BACKEND', subgraph_backend): check_call( _LIB.MXSetSubgraphPropertyOpNames(c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names))) partitioned_exec = get_executor(sym, subgraph_backend, op_names, original_exec) check_call( _LIB.MXRemoveSubgraphPropertyOpNames(c_str(subgraph_backend))) outputs1 = original_exec.outputs outputs2 = partitioned_exec.outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), onp.zeros(shape=(1, )))
def verify(batch_size): print('verifying batch size: ', batch_size) fold = Fold() num_samples = 100 inputs = [] fold_preds = [] for i in range(num_samples): # get next batch l_sent = l_sentences[i] r_sent = r_sentences[i] l_tree = l_trees[i] r_tree = r_trees[i] inputs.append((l_sent, r_sent, l_tree, r_tree)) z_fold = net.fold_encode(fold, l_sent, r_sent, l_tree, r_tree) fold_preds.append(z_fold) if (i + 1) % batch_size == 0 or (i + 1) == num_samples: fold_outs = fold([fold_preds])[0] outs = mx.nd.concat(*[ net(l_sent, r_sent, l_tree, r_tree) for l_sent, r_sent, l_tree, r_tree in inputs ], dim=0) if not almost_equal(fold_outs.asnumpy(), outs.asnumpy()): print(fold_preds) print('l_sents: ', l_sent, l_sentences[i - 1]) print('r_sents: ', r_sent, r_sentences[i - 1]) print('\n'.join( (str(l_tree), str_tree(l_tree), str(r_tree), str_tree(r_tree), str(l_trees[i - 1]), str_tree(l_trees[i - 1]), str(r_trees[i - 1]), str_tree(r_trees[i - 1]), str(fold)))) assert_almost_equal(fold_outs.asnumpy(), outs.asnumpy()) fold_preds = [] inputs = [] fold.reset()
def test_instance_norm(): dtype = np.float32 forward_check_eps = 1E-3 axis = -1 eps = 1E-5 in_shape = (LARGE_X, 1, SMALL_Y) ctx = mx.cpu() # Implementation of instance normalization using numpy def npy_instance_norm(data, gamma, beta, axis, eps=1E-5): if axis < 0: axis += data.ndim broadcast_shape = [1 for _ in range(data.ndim)] broadcast_shape[axis] = data.shape[axis] mean = data.mean(axis=axis, keepdims=True).astype(dtype) var = data.var(axis=axis, keepdims=True).astype(dtype) std = np.sqrt(var + dtype(eps)).astype(dtype) out = gamma * (data - mean) / std + \ beta return out data = np.random.normal(0, 1, in_shape).astype(dtype) gamma = np.random.normal(0, 1, (1,)).astype(dtype) beta = np.random.normal(0, 1, (1,)).astype(dtype) data_s = mx.symbol.Variable('data') gamma_s = mx.symbol.Variable('gamma') beta_s = mx.symbol.Variable('beta') out_s = mx.symbol.InstanceNorm(data=data_s, gamma=gamma_s, beta=beta_s, eps=eps) exe = out_s.simple_bind(ctx, data=in_shape) exe.arg_dict['data'][:] = data exe.arg_dict['gamma'][:] = gamma exe.arg_dict['beta'][:] = beta out_nd = exe.forward()[0] # Calls implementation of instance norm in numpy and compares the output out = npy_instance_norm(data, gamma, beta, axis, eps) assert_almost_equal(out, out_nd.asnumpy(), forward_check_eps, forward_check_eps)
def test_subgraph_exe1(sym, subgraph_backend, op_names): """Use the partitioned sym to _simple_bind an executor and compare the outputs with those of the original executor""" sym, _, _ = sym out = SymbolHandle() check_call( _LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names), ctypes.byref(out))) partitioned_sym = Symbol(out) assert partitioned_sym.list_inputs() == sym.list_inputs() assert partitioned_sym.list_arguments() == sym.list_arguments() assert partitioned_sym.list_auxiliary_states( ) == sym.list_auxiliary_states() exe = sym._simple_bind(ctx=mx.current_context(), grad_req='null') partitioned_exe = partitioned_sym._simple_bind(ctx=mx.current_context(), grad_req='null') input_names = sym.list_inputs() for name in input_names: if name in exe.arg_dict: exe.arg_dict[name][:] = mx.nd.random.uniform( shape=exe.arg_dict[name].shape) partitioned_exe.arg_dict[name][:] = exe.arg_dict[name] else: assert name in exe.aux_dict exe.aux_dict[name][:] = mx.nd.random.uniform( shape=exe.aux_dict[name].shape) partitioned_exe.aux_dict[name][:] = exe.aux_dict[name] exe.forward() partitioned_exe.forward() assert len(exe.outputs) == len(partitioned_exe.outputs) for i in range(len(exe.outputs)): assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(), onp.zeros(shape=(1, )))
def test_predictor(): prefix = 'test_predictor_simple_dense' symbol_file = "%s-symbol.json" % prefix param_file = "%s-0000.params" % prefix # two inputs with different batch sizes input1 = np.random.uniform(size=(1, 3)) input2 = np.random.uniform(size=(3, 3)) # define a simple model block = gluon.nn.HybridSequential() block.add(gluon.nn.Dense(7)) block.add(gluon.nn.Dense(3)) block.hybridize() block.initialize() out1 = block.forward(nd.array(input1)) out2 = block.forward(nd.array(input2)) block.export(prefix) # create a predictor predictor = Predictor( open(symbol_file, "r").read(), open(param_file, "rb").read(), {'data': input1.shape}) # forward and get output predictor.forward(data=input1) predictor_out1 = predictor.get_output(0) assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) # reshape predictor.reshape({'data': input2.shape}) predictor.forward(data=input2) predictor_out2 = predictor.get_output(0) assert_almost_equal(out2.asnumpy(), predictor_out2, rtol=1e-5, atol=1e-6) # destroy the predictor del predictor
def test_transformer_encoder(): ctx = mx.current_context() for num_layers in range(1, 3): for output_attention in [True, False]: for use_residual in [False, True]: encoder = TransformerEncoder(num_layers=num_layers, max_length=10, units=16, hidden_size=32, num_heads=8, dropout=0.0, use_residual=use_residual, output_attention=output_attention, prefix='transformer_encoder_') encoder.initialize(ctx=ctx) encoder.hybridize() for batch_size in [4]: for seq_length in [5, 10]: inputs_nd = mx.nd.random.normal(0, 1, shape=(batch_size, seq_length, 16), ctx=ctx) valid_length_nd = mx.nd.array(np.random.randint(1, seq_length, size=(batch_size,)), ctx=ctx) encoder_outputs, additional_outputs = encoder(inputs_nd, valid_length=valid_length_nd) valid_length_npy = valid_length_nd.asnumpy() encoder_outputs = encoder_outputs.asnumpy() for i in range(batch_size): if valid_length_npy[i] < seq_length - 1: padded_out = encoder_outputs[i, int(valid_length_npy[i]):, :] assert_almost_equal(padded_out, np.zeros_like(padded_out), 1E-6, 1E-6) assert(encoder_outputs.shape == (batch_size, seq_length, 16)) if output_attention: assert(len(additional_outputs) == num_layers) attention_out = additional_outputs[0][0].asnumpy() assert(attention_out.shape == (batch_size, 8, seq_length, seq_length)) for i in range(batch_size): mem_v_len = int(valid_length_npy[i]) if mem_v_len < seq_length - 1: assert((attention_out[i, :, :, mem_v_len:] == 0).all()) if mem_v_len > 0: assert_almost_equal(attention_out[i, :, :, :].sum(axis=-1), np.ones(attention_out.shape[1:3])) else: assert(len(additional_outputs) == 0)
def check_fusion(sym, data_shape, attrs_op): sym_sg = sym.get_backend_symbol("MKLDNN") assert ''.join( sym_sg.get_internals().list_outputs()).find('sg_mkldnn_conv') != -1 for k, v in sym_sg.attr_dict().items(): if k.find('sg_mkldnn_conv') != -1: for attr_op in attrs_op: assert v[attr_op] == 'true' arg_shapes, _, aux_shapes = sym.infer_shape() arg_array = [ mx.nd.random.uniform(-1, 1, shape=shape) for shape in arg_shapes ] aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes] exe = sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') exe.forward() os.environ['MXNET_SUBGRAPH_BACKEND'] = 'MKLDNN' exe_sg = sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') exe_sg.forward() del os.environ['MXNET_SUBGRAPH_BACKEND'] for i in range(len(exe.outputs)): assert_almost_equal(exe.outputs[i].asnumpy(), exe_sg.outputs[i].asnumpy(), rtol=1e-3, atol=1e-3) # fp32 to int8 for out_type in ('uint8', 'int8', 'auto'): check_quantize(sym, data_shape, out_type) check_quantize(sym, data_shape, out_type, gluon_forward=True)
def test_elemwise_add(): def ref_add(a, b): return np.add(a, b) a_sym = mx.sym.Variable("a") b_sym = mx.sym.Variable("b") dshape = rand_shape_nd(4) a_shape = tuple(dshape) b_shape = tuple(dshape) z = mx.sym.elemwise_add(a_sym, b_sym) a = np.random.uniform(-1, 1, a_shape) b = np.random.uniform(-1, 1, b_shape) exe = z._simple_bind(ctx=mx.cpu(), a=a_shape, b=b_shape) out = exe.forward(is_train=False, a=a, b=b) ref_out = ref_add(a, b) out = out[0].asnumpy() assert_almost_equal(out, ref_out, rtol=1e-6, atol=1e-6) def check_elemwise_add_training(stype): data_shape = rand_shape_nd(4) for density in [1.0, 0.5, 0.0]: a_sym = mx.sym.Variable('a') b_sym = mx.sym.Variable('b') sym = mx.sym.elemwise_add(a_sym, b_sym) a = rand_ndarray(shape=data_shape, stype=stype, density=density) b = rand_ndarray(shape=data_shape, stype=stype, density=density) in_location = [a, b] check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3) stypes = ['row_sparse', 'default'] for stype in stypes: check_elemwise_add_training(stype)
def test_subgraph_backend_gluon(sym, subgraph_backend, op_names, tmp_path): """Call hybridize() to partition the graph, and then compare results of the partitioned sym and the original sym. Here do an inference before hybridizing with the subgraph_backend which means we'll pass shapes/types""" # create Gluon block for given symbol inputs = [mx.sym.var(i, dtype=mx_real_t) for i in sym[1]] sym_block = nn.SymbolBlock(sym[0], inputs) sym_block.initialize(ctx=mx.current_context()) x = [ mx.nd.random.uniform(shape=s, ctx=mx.current_context()) for s in sym[2] ] # hybridize and export to get baseline sym_block.hybridize() outputs1 = sym_block(*x) sym_filename, params_filename = sym_block.export( str(tmp_path / 'sym-block')) # load model and partition sym_block = nn.SymbolBlock.imports(sym_filename, sym[1], params_filename, ctx=mx.current_context()) check_call( _LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names))) sym_block.optimize_for(*x, backend=subgraph_backend) outputs2 = sym_block(*x) check_call(_LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend))) # compare outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), onp.zeros(shape=(1, )))
def test_subgraph_exe3(sym, subgraph_backend, op_names): """Use the partitioned sym to bind an executor and compare the outputs with those of the original executor""" sym, _, _ = sym out = SymbolHandle() check_call( _LIB.MXBuildSubgraphByOpNames(sym.handle, c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names), ctypes.byref(out))) partitioned_sym = Symbol(out) input_names = sym.list_inputs() arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() assert partitioned_sym.list_inputs() == input_names assert partitioned_sym.list_arguments() == arg_names assert partitioned_sym.list_auxiliary_states() == aux_names arg_shapes, _, aux_shapes = sym.infer_shape() arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes] aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes] exe = sym._bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') partitioned_exe = partitioned_sym._bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') exe.forward() partitioned_exe.forward() assert len(exe.outputs) == len(partitioned_exe.outputs) for i in range(len(exe.outputs)): assert_almost_equal((exe.outputs[i] - partitioned_exe.outputs[i]).abs().sum().asnumpy(), onp.zeros(shape=(1, )))
def test_mkldnn_engine_threading(): net = gluon.nn.HybridSequential() with net.name_scope(): net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None)) net.collect_params().initialize(ctx=mx.cpu()) class Dummy(gluon.data.Dataset): def __len__(self): return 2 def __getitem__(self, key): return key, np.ones((3, 224, 224)), np.ones((10, )) loader = gluon.data.DataLoader(Dummy(), batch_size=2, num_workers=1) X = (32, 3, 32, 32) # trigger mkldnn execution thread y = net(mx.nd.array(np.ones(X))).asnumpy() # Use Gluon dataloader to trigger different thread. # below line triggers different execution thread for _ in loader: y = net(mx.nd.array(np.ones(X))).asnumpy() # output should be 016711406 (non-mkldnn mode output) assert_almost_equal(y[0, 0, 0, 0], 0.016711406) break
def test_mkldnn_engine_threading(): """ This test will trigger mkldnn engine on different thread of execution. The test will first kickoff simple model calculation, and then uses a gluon data iterator to trigger different thread context, and executes the model on this new thread. """ import mxnet as mx from mxnet import gluon, nd net = gluon.nn.HybridSequential() with net.name_scope(): net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation=None)) net.collect_params().initialize(ctx=mx.cpu()) class Dummy(gluon.data.Dataset): def __len__(self): return 2 def __getitem__(self, key): return key, np.ones((3, 224, 224)), np.ones((10, )) loader = gluon.data.DataLoader(Dummy(), batch_size=2, num_workers=1) X = (32, 3, 32, 32) # trigger mkldnn execution thread y = net(nd.array(np.ones(X))).asnumpy() # Use Gluon dataloader to trigger different thread. # below line triggers different execution thread for _ in loader: y = net(nd.array(np.ones(X))).asnumpy() # output should have 0.3376348 assert_almost_equal(y[0, 0, 0, 0], 0.3376348) break
def test_seg_broadcast_binary(): for ctx in [mx.cpu(), mx.gpu()]: for np_func, nd_func, name in [ (npy_seg_broadcast_add, nd.contrib.seg_broadcast_add, 'add'), (npy_seg_broadcast_mul, nd.contrib.seg_broadcast_mul, 'mul') ]: for batch_size, seg_num, nnz in [(1, 5, 10), (10, 50, 100), (4, 1000, 10000)]: lhs_npy = np.random.normal(0, 1, (batch_size, nnz)) rhs_npy = np.random.normal(0, 1, (batch_size, seg_num)) indptr_npy = rand_indptr(seg_num, nnz) # Test broadcast_add print('broadcast_' + name) gt_npy = np_func(lhs_npy, rhs_npy, indptr_npy) # Test mx.nd lhs_nd = nd.array(lhs_npy, dtype=np.float32, ctx=ctx) rhs_nd = nd.array(rhs_npy, dtype=np.float32, ctx=ctx) indptr_nd = nd.array(indptr_npy, dtype=np.int32, ctx=ctx) ret_nd = nd_func(lhs=lhs_nd, rhs=rhs_nd, indptr=indptr_nd) assert_almost_equal(ret_nd.asnumpy(), gt_npy, rtol=1E-4, atol=1E-4)
def test_subgraph_backend_gluon_ext2(tmpdir): class Net(gluon.HybridBlock): def __init__(self, **kwargs): super(Net, self).__init__(**kwargs) with self.name_scope(): self.fc1 = nn.Dense(256) self.fc2 = nn.Dense(128) self.fc3 = nn.Dense(2) def hybrid_forward(self, F, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) return self.fc3(x) # regular inference x = nd.random.normal(shape=(1, 512),ctx=mx.current_context()) net = Net() net.collect_params().initialize(ctx=mx.current_context()) outputs1 = net(x) param_path = os.path.join(str(tmpdir), 'test_subgraph_backend_gluon_ext2.params') net.save_parameters(param_path) # after partitioning net = Net() net.load_parameters(param_path, ctx=mx.current_context()) subgraph_backend = 'default' op_names = ['FullyConnected'] check_call(_LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names))) net.hybridize(backend = subgraph_backend) outputs2 = net(x) check_call(_LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend))) # compare outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal((outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1,)))
def test_np_loss_ndarray(): # Ported from test_loss.test_loss_ndarray output = np.array([1, 2, 3, 4]) label = np.array([1, 3, 5, 7]) weighting = np.array([0.5, 1, 0.5, 1]) loss = gluon.loss.L1Loss() assert float(np.sum(loss(output, label))) == 6. loss = gluon.loss.L1Loss(weight=0.5) assert float(np.sum(loss(output, label))) == 3. loss = gluon.loss.L1Loss() assert float(np.sum(loss(output, label, weighting))) == 5. loss = gluon.loss.L2Loss() assert float(np.sum(loss(output, label))) == 7. loss = gluon.loss.L2Loss(weight=0.25) assert float(np.sum(loss(output, label))) == 1.75 loss = gluon.loss.L2Loss() assert float(np.sum(loss(output, label, weighting))) == 6 output = np.array([[0, 2], [1, 4]]) label = np.array([0, 1]) weighting = np.array([[0.5], [1.0]]) loss = gluon.loss.SoftmaxCrossEntropyLoss() L = loss(output, label).asnumpy() assert_almost_equal(L, _np.array([2.12692809, 0.04858733]), use_broadcast=False, rtol=1e-3) L = loss(output, label, weighting).asnumpy() assert_almost_equal(L, _np.array([1.06346405, 0.04858733]), use_broadcast=False, rtol=1e-3)
def test_softmax_cross_entropy(): # dtype of input data, mxnet cross entropy set explicitly to float64 # numpy implicitly takes care of double precision batch_size = SMALL_Y num_labels = LARGE_X input_data = mx.nd.ones((batch_size, num_labels), dtype="float64") input_label = mx.nd.zeros((batch_size, ), dtype="float64") true_softmax = np.full((batch_size, num_labels), (1 / num_labels)) # use 1/batch_size when softmax axis=0 # here 1/num_labels since softmax_cross_entropy uses default axis # by default axis=1 np_one_hot_label = np.zeros((batch_size, num_labels)) np_one_hot_label[:, 0] = 1 true_softmax_cross_entropy = np.sum(-np.log(true_softmax) * np_one_hot_label) mx_softmax_cross_entropy = mx.nd.softmax_cross_entropy(input_data, input_label, dtype="float64") assert_almost_equal(mx_softmax_cross_entropy.asnumpy(), true_softmax_cross_entropy, rtol=1e-3, atol=1e-5)
def check_amp_fuse(net, data_example, expected_sym=None, quantized_nodes=[], rtol=0.05): net.hybridize() out_ref = net(*data_example) net.optimize_for(data_example, backend=SG_PASS_NAME) # amp pass works only on oneDNN nodes lp_net = amp.convert_hybrid_block(net, data_example, target_dtype=AMP_DTYPE, excluded_sym_names=quantized_nodes, cast_params_offline=True, device=mx.current_context()) lp_net.optimize_for(data_example, backend=AMP_SG_PASS_NAME) out_lp_net = lp_net(*data_example) # check outputs out_ref = [out_ref] if not isinstance(out_ref, list) else out_ref out_lp_net = [out_lp_net] if not isinstance(out_ref, list) else out_lp_net for ref_out, lp_out in zip(out_ref, out_lp_net): assert_almost_equal(ref_out, lp_out, rtol=rtol, atol=1.0) # check graph if expected_sym is not None: lp_symnet = lp_net.export(None, remove_amp_cast=False)[0] same_graph_structure(lp_symnet, expected_sym, True) # check amp with quantization check_amp_with_quantization(net, data_example, quantized_nodes)
def _check_subgraph_exe8(sym, subgraph_backend, op_names): """Call optimize_for to infer shapes, types and dtypes followed by graph partitioning, then bind and compare results of the partitioned sym and the original sym.""" # bind arg_shapes, _, aux_shapes = sym.infer_shape() arg_array = [mx.nd.random.uniform(shape=shape) for shape in arg_shapes] aux_array = [mx.nd.random.uniform(shape=shape) for shape in aux_shapes] exe1 = sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') exe1.forward() # infer shape/type before partition before bind check_call( _LIB.MXSetSubgraphPropertyOpNamesV2(c_str(subgraph_backend), mx_uint(len(op_names)), c_str_array(op_names))) part_sym = sym.optimize_for(subgraph_backend, arg_array) check_call( _LIB.MXRemoveSubgraphPropertyOpNamesV2(c_str(subgraph_backend))) exe2 = part_sym.bind(ctx=mx.current_context(), args=arg_array, aux_states=aux_array, grad_req='null') exe2.forward() # compare outputs outputs1 = exe1.outputs outputs2 = exe2.outputs assert len(outputs1) == len(outputs2) for i in range(len(outputs1)): assert_almost_equal( (outputs1[i] - outputs2[i]).abs().sum().asnumpy(), np.zeros(shape=(1, )))
def test_sparse_aggregator(): """aggregate sparse ndarray on muliple devices""" stype = 'row_sparse' kv = init_kv(stype) # devices num_devs = 4 devs = [mx.Context('cpu', i) for i in range(num_devs)] # single vals = [rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)] expected_sum = np.zeros(shape) for v in vals: expected_sum += v.asnumpy() kv.push(3, vals) kv.pull(3, out = vals) result_sum = np.zeros(shape) for v in vals: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs) # list vals = [[rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)]] * len(keys) expected_sum = np.zeros(shape) for v in vals[0]: expected_sum += v.asnumpy() kv.push(keys, vals) kv.pull(keys, out = vals) for vv in vals: result_sum = np.zeros(shape) for v in vv: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs)