def check_binary_op_result(shape1, shape2, op, dtype=None): if shape1 is None: mx_input1 = abs(_np.random.uniform()) + 1 np_input1 = mx_input1 else: mx_input1 = rand_ndarray(shape1, dtype=dtype).abs() + 1 np_input1 = mx_input1.asnumpy() if shape2 is None: mx_input2 = abs(_np.random.uniform()) + 1 np_input2 = mx_input2 else: mx_input2 = rand_ndarray(shape2, dtype=dtype).abs() + 1 np_input2 = mx_input2.asnumpy() scalar = None reverse = False if isinstance(mx_input1, mx.nd.NDArray) and not isinstance( mx_input2, mx.nd.NDArray): scalar = mx_input2 reverse = False elif isinstance(mx_input2, mx.nd.NDArray) and not isinstance( mx_input1, mx.nd.NDArray): scalar = mx_input1 reverse = True np_out = get_np_ret(np_input1, np_input2, op) for hybridize in [True, False]: if scalar is None: get_mx_ret_np = TestBinaryElementWiseOp(op) get_mx_ret_classic = TestBinaryElementWiseOp(op) if hybridize: get_mx_ret_np.hybridize() get_mx_ret_classic.hybridize() mx_out = get_mx_ret_np(mx_input1.as_np_ndarray(), mx_input2.as_np_ndarray()) assert type(mx_out) == np.ndarray assert np_out.shape == mx_out.shape assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5) else: get_mx_ret = TestBinaryElementWiseOp(op, scalar=scalar, reverse=reverse) if hybridize: get_mx_ret.hybridize() if reverse: mx_out = get_mx_ret(mx_input2.as_np_ndarray()) assert type(mx_out) == np.ndarray else: mx_out = get_mx_ret(mx_input1.as_np_ndarray()) assert type(mx_out) == np.ndarray assert np_out.shape == mx_out.shape assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5)
def check_elemwise_add_training(stype): data_shape = rand_shape_nd(4) for density in [1.0, 0.5, 0.0]: a_sym = mx.sym.Variable('a') b_sym = mx.sym.Variable('b') sym = mx.sym.elemwise_add(a_sym, b_sym) a = rand_ndarray(shape=data_shape, stype=stype, density=density) b = rand_ndarray(shape=data_shape, stype=stype, density=density) in_location = [a, b] check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
def test_np_dot(): shapes = [ ((3, 0), (0, 4)), ((3, ), (3, )), # Case 1 ((3, 4), (4, 5)), # Case 2 ((), ()), # Case 3 ((3, 4, 5), ()), # Case 3.5.1 ((), (3, 4, 5)), # Case 3.5.2 ((3, 4, 5), (5, )), # Case 4 ((3, 4, 5), (5, 2)), # Case 5 ((5, ), (5, 2)), ((3, 5, 4), (5, 4, 3)), ((3, 4), (5, 4, 3)), ((4, ), (5, 4, 3)) ] eps = 1e-3 for shape_a, shape_b in shapes: np_a = _np.random.uniform(-1.0, 1.0, shape_a) np_a[abs(np_a) < eps] = 2 * eps np_b = _np.random.uniform(-1.0, 1.0, shape_b) np_b[abs(np_b) < eps] = 2 * eps a = mx.nd.array(np_a) b = mx.nd.array(np_b) np_res = _np.dot(np_a, np_b) mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray()) assert mx_res.shape == np_res.shape assert_almost_equal(np_res, mx_res.asnumpy(), rtol=1e-5, atol=1e-5) mx_a = mx.sym.Variable("a") mx_b = mx.sym.Variable("b") mx_sym = mx.sym.np.dot(mx_a.as_np_ndarray(), mx_b.as_np_ndarray()).as_nd_ndarray() if (len(shape_a) > 0 and len(shape_b) > 0 and _np.prod(shape_a) > 0 and _np.prod(shape_b) > 0): check_numeric_gradient(mx_sym, { "a": a, "b": b }, numeric_eps=eps, rtol=1e-2, atol=1e-3) bad_shapes = [((4, 5), (2, 3)), ((3, 4, 5), (6, ))] for shape_a, shape_b in bad_shapes: a = mx.nd.array( random.random()) if len(shape_a) == 0 else rand_ndarray(shape_a) b = mx.nd.array( random.random()) if len(shape_b) == 0 else rand_ndarray(shape_b) try: mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray()) except mx.base.MXNetError: continue assert False
def bench_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, lhs_den, rhs_den, trans_lhs, ctx, num_repeat=10, fw="mxnet", distribution="uniform"): set_default_context(ctx) assert fw == "mxnet" or fw == "scipy" # Set funcs dot_func_sparse = mx.nd.sparse.dot if fw == "mxnet" else sp.spmatrix.dot dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot # Create matrix instances lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den, distribution=distribution) # only uniform distribution supported for rhs if rhs_stype == 'csr': rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution=distribution) else: rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution="uniform") lhs_dns = None rhs_dns = None dense_cost = None sparse_cost = None if fw == "mxnet": lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype('default') rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default') # One warm up run, verify correctness out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs) out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs) assert_almost_equal(out.asnumpy(), out_expected.asnumpy(), rtol=1e-1, atol=1e-1) sparse_cost = measure_cost(num_repeat, False, False, dot_func_sparse, lhs_nd, rhs_nd, trans_lhs) dense_cost = measure_cost(num_repeat, False, False, dot_func_dense, lhs_dns, rhs_dns, trans_lhs) else: lhs_dns = lhs_nd.asnumpy() rhs_dns = rhs_nd.asnumpy() lhs_nd = sp.csr_matrix(lhs_nd.asnumpy()) rhs_nd = rhs_nd.asnumpy() # One warm up run, verify correctness lhs_nd_copy = sp.spmatrix.transpose(lhs_nd) if trans_lhs else lhs_nd out = dot_func_sparse(lhs_nd_copy, rhs_dns) sparse_cost = measure_cost(num_repeat, trans_lhs, False, dot_func_sparse, lhs_nd, rhs_nd) dense_cost = measure_cost(num_repeat, trans_lhs, True, dot_func_dense, lhs_dns, rhs_dns) speedup = dense_cost / sparse_cost # Print results m = lhs_shape[0] k = lhs_shape[1] n = rhs_shape[1] result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}' results = result_pattern.format(lhs_den*100, rhs_den*100, str(ctx), m, k, n, sparse_cost*1000, dense_cost*1000, speedup) print(results)
def bench_dot(lhs_row_dim, lhs_col_dim, rhs_col_dim, density, rhs_density, dot_func, trans_lhs, lhs_stype, rhs_stype, only_storage, distribution="uniform"): """ Benchmarking both storage and dot """ lhs_nd = rand_ndarray((lhs_row_dim, lhs_col_dim), lhs_stype, density, distribution=distribution) if not only_storage: rhs_nd = rand_ndarray((lhs_col_dim, rhs_col_dim), rhs_stype, density=rhs_density, distribution=distribution) out = dot_func(lhs_nd, rhs_nd, trans_lhs) mx.nd.waitall()
def check_fullyconnected_training(stype): data_shape = rand_shape_nd(2) weight_shape = rand_shape_nd(2) weight_shape = (weight_shape[0], data_shape[1]) for density in [1.0, 0.5, 0.0]: x = rand_ndarray(shape=data_shape, stype=stype, density=density) w = rand_ndarray(shape=weight_shape, stype=stype, density=density) x_sym = mx.sym.Variable("data") w_sym = mx.sym.Variable("weight") sym = mx.sym.FullyConnected(data=x_sym, weight=w_sym, num_hidden=weight_shape[0], no_bias=True) in_location = [x, w] check_numeric_gradient(sym, in_location, numeric_eps=1e-3, rtol=1e-3, atol=5e-3)
def bench_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, lhs_den, rhs_den, trans_lhs, ctx, num_repeat=10, fw="mxnet", distribution="uniform"): set_default_context(ctx) assert fw == "mxnet" or fw == "scipy" # Set funcs dot_func_sparse = mx.nd.dot if fw == "mxnet" else sp.spmatrix.dot dot_func_dense = mx.nd.dot if fw == "mxnet" else np.dot # Create matrix instances lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den, distribution=distribution) # only uniform distribution supported for rhs rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den, distribution="uniform") lhs_dns = None rhs_dns = None dense_cost = None sparse_cost = None if fw == "mxnet": lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.tostype('default') rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.tostype('default') # One warm up run, verify correctness out = dot_func_sparse(lhs_nd, rhs_dns, trans_lhs) out_expected = dot_func_dense(lhs_dns, rhs_dns, trans_lhs) assert_almost_equal(out.asnumpy(), out_expected.asnumpy(), rtol=1e-1, atol=1e-1) sparse_cost = measure_cost(num_repeat, False, False, dot_func_sparse, lhs_nd, rhs_nd, trans_lhs) dense_cost = measure_cost(num_repeat, False, False, dot_func_dense, lhs_dns, rhs_dns, trans_lhs) else: lhs_dns = lhs_nd.asnumpy() rhs_dns = rhs_nd.asnumpy() lhs_nd = sp.csr_matrix(lhs_nd.asnumpy()) rhs_nd = rhs_nd.asnumpy() # One warm up run, verify correctness lhs_nd_copy = sp.spmatrix.transpose(lhs_nd) if trans_lhs else lhs_nd out = dot_func_sparse(lhs_nd_copy, rhs_dns) sparse_cost = measure_cost(num_repeat, trans_lhs, False, dot_func_sparse, lhs_nd, rhs_nd) dense_cost = measure_cost(num_repeat, trans_lhs, True, dot_func_dense, lhs_dns, rhs_dns) speedup = dense_cost / sparse_cost # Print results m = lhs_shape[0] k = lhs_shape[1] n = rhs_shape[1] result_pattern = '{:15.1f} {:15.1f} {:>10} {:8d} {:8d} {:8d} {:13.2f} {:13.2f} {:8.2f}' results = result_pattern.format(lhs_den*100, rhs_den*100, str(ctx), m, k, n, sparse_cost*1000, dense_cost*1000, speedup) print(results)
def compare_optimizer(opt1, opt2, shape, dtype, w_stype='default', g_stype='default', rtol=1e-4, atol=1e-5, compare_states=True): """Compare opt1 and opt2.""" if not isinstance(shape, list): if w_stype == 'default': w2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) w1 = w2.copyto(default_context()) elif w_stype == 'row_sparse' or w_stype == 'csr': w2 = rand_ndarray(shape, w_stype, density=1, dtype=dtype) w1 = w2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") if g_stype == 'default': g2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) g1 = g2.copyto(default_context()) elif g_stype == 'row_sparse' or g_stype == 'csr': g2 = rand_ndarray(shape, g_stype, dtype=dtype) g1 = g2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") state1 = opt1.create_state_multi_precision(0, w1) state2 = opt2.create_state_multi_precision(0, w2) if compare_states: compare_ndarray_tuple(state1, state2) opt1.update_multi_precision(0, w1, g1, state1) opt2.update_multi_precision(0, w2, g2, state2) if compare_states: compare_ndarray_tuple(state1, state2, rtol=rtol, atol=atol) assert_almost_equal(w1.asnumpy(), w2.asnumpy(), rtol=rtol, atol=atol) else: # test multi-tensor: Opt1 single-tensor reference, Opt2 multi-tensor from copy import deepcopy ntensors = len(shape) w1, g1 = [], [] for s in shape: w1.append(mx.random.uniform(shape=s, ctx=default_context(), dtype=dtype)) g1.append(mx.random.uniform(shape=s, ctx=default_context(), dtype=dtype)) w1 = tuple(w1) w2 = deepcopy(w1) g1 = tuple(g1) g2 = deepcopy(g1) state2 = [opt2.create_state_multi_precision(0, w2[i]) for i in range(ntensors)] opt2.update_multi_precision(list(range(ntensors)), w2, g2, state2) for i in range(ntensors): state1 = opt1.create_state_multi_precision(i, w1[i]) opt1.update_multi_precision(i, w1[i], g1[i], state1) if compare_states: compare_ndarray_tuple(state1, state2[i], rtol, atol) assert_almost_equal(w1[i].asnumpy(), w2[i].asnumpy(), rtol=rtol, atol=atol)
def check_sparse_aggregator(sparse_pull): stype = 'row_sparse' kv = init_kv_with_str(stype) # devices num_devs = 4 devs = [mx.Context('cpu', i) for i in range(num_devs)] # single vals = [ rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs) ] expected_sum = np.zeros(shape) for v in vals: expected_sum += v.asnumpy() # prepare row_ids kv.push('a', vals) if sparse_pull: all_rows = mx.nd.array(np.arange(shape[0])) kv.row_sparse_pull('a', out=vals, row_ids=[all_rows] * len(vals)) else: kv.pull('a', out=vals, ignore_sparse=False) result_sum = np.zeros(shape) for v in vals: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs) # list vals = [[ rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs) ]] * len(keys) expected_sum = np.zeros(shape) for v in vals[0]: expected_sum += v.asnumpy() kv.push(str_keys, vals) if sparse_pull: kv.row_sparse_pull(str_keys, out=vals, row_ids=[[all_rows] * num_devs] * len(vals)) else: kv.pull(str_keys, out=vals, ignore_sparse=False) for vv in vals: result_sum = np.zeros(shape) for v in vv: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs)
def run_benchmark(mini_path): """Run benchmarks """ data_shape = (feature_dim, ) train_iter = _get_iter(mini_path, data_shape, batch_size) weight_row_dim = batch_size if transpose else feature_dim weight_shape = (weight_row_dim, output_dim) if not rsp: weight = mx.nd.random.uniform(low=0, high=1, shape=weight_shape) else: weight = rand_ndarray(weight_shape, "row_sparse", density=0.05, distribution="uniform") total_cost = {} average_cost = {} count = 0 total_cost["sparse"] = 0. total_cost["dense"] = 0. for _ in train_iter: csr_data = train_iter.getdata() dns_data = csr_data.tostype('default') cost_sparse = measure_cost(num_repeat, False, False, mx.nd.sparse.dot, csr_data, weight, transpose_a=transpose) cost_dense = measure_cost(num_repeat, False, False, mx.nd.dot, dns_data, weight, transpose_a=transpose) total_cost["sparse"] += cost_sparse total_cost["dense"] += cost_dense count = count + 1 average_cost["sparse"] = total_cost["sparse"] / count average_cost["dense"] = total_cost["dense"] / count return (average_cost["sparse"], average_cost["dense"])
def run_benchmark(mini_path): """Run benchmarks """ data_shape = (feature_dim, ) train_iter = _get_iter(mini_path, data_shape, batch_size) weight_row_dim = batch_size if transpose else feature_dim weight_shape = (weight_row_dim, output_dim) if not rsp: weight = mx.nd.random_uniform(low=0, high=1, shape=weight_shape) else: weight = rand_ndarray(weight_shape, "row_sparse", density=0.05, distribution="uniform") total_cost = {} average_cost = {} count = 0 total_cost["sparse"] = 0. total_cost["dense"] = 0. for _ in train_iter: csr_data = train_iter.getdata() dns_data = csr_data.tostype('default') cost_sparse = measure_cost(num_repeat, False, False, mx.nd.dot, csr_data, weight, transpose_a=transpose) cost_dense = measure_cost(num_repeat, False, False, mx.nd.dot, dns_data, weight, transpose_a=transpose) total_cost["sparse"] += cost_sparse total_cost["dense"] += cost_dense count = count + 1 average_cost["sparse"] = total_cost["sparse"] / count average_cost["dense"] = total_cost["dense"] / count return (average_cost["sparse"], average_cost["dense"])
def test_np_transpose(): def np_transpose_grad(out_shape, dtype, axes=None): ograd = _np.ones(out_shape, dtype=dtype) if axes is None or axes == (): return _np.transpose(ograd, axes) np_axes = _np.array(list(axes)) return _np.transpose(ograd, tuple(list(_np.argsort(np_axes)))) class TestTranspose(HybridBlock): def __init__(self, axes=None): super(TestTranspose, self).__init__() self.axes = axes def hybrid_forward(self, F, a): return F.np.transpose(a, self.axes) for hybridize in [True, False]: for dtype in [_np.int32, _np.float32]: for ndim in range(7): shape = rand_shape_nd(ndim, dim=5, allow_zero_size=True) axeses = [None] if ndim == 0: axeses += [()] else: axes = [i for i in range(ndim)] axeses.append(tuple(axes)) random.shuffle(axes) axeses.append(tuple(axes)) for axes in axeses: test_trans = TestTranspose(axes) if hybridize: test_trans.hybridize() x = rand_ndarray(shape).as_np_ndarray() x = x.astype(dtype) x.attach_grad() np_out = _np.transpose(x.asnumpy(), axes) with mx.autograd.record(): mx_out = test_trans(x) assert mx_out.shape == np_out.shape assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = np_transpose_grad(np_out.shape, dtype, axes) assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out = np.transpose(x, axes) np_out = _np.transpose(x.asnumpy(), axes) assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
def test_lstmp(): hidden_size, projection_size = 3, 2 rtol, atol = 1e-2, 1e-2 batch_size, seq_len = 7, 11 input_size = 5 device = mx.gpu(0) lstm_input = mx.np.random.uniform( size=(seq_len, batch_size, input_size), device=device) shapes = {'i2h_weight': (hidden_size * 4, input_size), 'h2h_weight': (hidden_size * 4, projection_size), 'i2h_bias': (hidden_size * 4,), 'h2h_bias': (hidden_size * 4,), 'h2r_weight': (projection_size, hidden_size)} weights = {k: rand_ndarray(v).as_np_ndarray() for k, v in shapes.items()} lstm_layer = gluon.rnn.LSTM(hidden_size, projection_size=projection_size, input_size=input_size) lstm_cell = gluon.rnn.LSTMPCell(hidden_size=hidden_size, projection_size=projection_size, input_size=input_size) lstm_layer.initialize(device=device) lstm_cell.initialize(device=device) layer_params = lstm_layer.collect_params() cell_params = lstm_cell.collect_params() params = (weights['{}_{}'.format(g, t)].reshape(-1) for t in ['weight', 'bias'] for g in ['i2h', 'h2h', 'h2r'] if g != 'h2r' or t != 'bias') net_params_concat = mx.np.concatenate(params) layer_params['rnn_param'].set_data(net_params_concat) for k, v in weights.items(): cell_params[k].set_data(v) with autograd.record(): layer_output = lstm_layer(lstm_input.copy()) cell_output = lstm_cell.unroll(seq_len, lstm_input.copy(), layout='TNC', merge_outputs=True)[0] assert_almost_equal(layer_output, cell_output, rtol=rtol, atol=atol) layer_output.backward() cell_output.backward() layer_params_split = split_rnn_params(layer_params['rnn_param'].grad(),\ 'lstm', 1, input_size, hidden_size, False, projection_size=projection_size) for k, _ in weights.items(): layer_grad = layer_params_split['l0_' + k] cell_grad = cell_params[k].grad() print('checking gradient for {}'.format('lstm0_l0_' + k)) assert_almost_equal(layer_grad, cell_grad, rtol=rtol, atol=atol) check_rnn_layer_forward(gluon.rnn.LSTM( 10, 2, projection_size=5), mx.np.ones((8, 3, 20)), device=device) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, projection_size=5, bidirectional=True), mx.np.ones( (8, 3, 20)), [mx.np.ones((4, 3, 5)), mx.np.ones((4, 3, 10))], device=device) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, dropout=0.5, projection_size=5), mx.np.ones((8, 3, 20)), run_only=True, device=device) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, bidirectional=True, dropout=0.5, projection_size=5), mx.np.ones((8, 3, 20)), [mx.np.ones((4, 3, 5)), mx.np.ones((4, 3, 10))], run_only=True, device=device) lstm_layer.save_parameters('gpu_tmp.params') lstm_layer.load_parameters('gpu_tmp.params')
def check_sparse_aggregator(sparse_pull): stype = 'row_sparse' kv = init_kv_with_str(stype) # devices num_devs = 4 devs = [mx.Context('cpu', i) for i in range(num_devs)] # single vals = [rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)] expected_sum = np.zeros(shape) for v in vals: expected_sum += v.asnumpy() # prepare row_ids kv.push('a', vals) if sparse_pull: all_rows = mx.nd.array(np.arange(shape[0])) kv.row_sparse_pull('a', out=vals, row_ids=[all_rows] * len(vals)) else: kv.pull('a', out=vals, ignore_sparse=False) result_sum = np.zeros(shape) for v in vals: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs) # list vals = [[rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)]] * len(keys) expected_sum = np.zeros(shape) for v in vals[0]: expected_sum += v.asnumpy() kv.push(str_keys, vals) if sparse_pull: kv.row_sparse_pull(str_keys, out=vals, row_ids=[[all_rows] * num_devs] * len(vals)) else: kv.pull(str_keys, out=vals, ignore_sparse=False) for vv in vals: result_sum = np.zeros(shape) for v in vv: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs)
def test_quadratic_backward(self): a = np.random.random_sample() b = np.random.random_sample() c = np.random.random_sample() for ndim in range(1, 6): shape = tu.rand_shape_nd(ndim, 5) data = tu.rand_ndarray(shape=shape, stype='default') data_np = data.asnumpy() data = mx.sym.Variable('data') quad_sym = mx.sym.contrib.quadratic_v2(data=data, a=a, b=b, c=c)
def test_lstmp(): hidden_size, projection_size = 3, 2 rtol, atol = 1e-2, 1e-2 batch_size, seq_len = 7, 11 input_size = 5 ctx = mx.gpu(0) lstm_input = mx.nd.uniform( shape=(seq_len, batch_size, input_size), ctx=ctx) shapes = {'i2h_weight': (hidden_size * 4, input_size), 'h2h_weight': (hidden_size * 4, projection_size), 'i2h_bias': (hidden_size * 4,), 'h2h_bias': (hidden_size * 4,), 'h2r_weight': (projection_size, hidden_size)} weights = {k: rand_ndarray(v) for k, v in shapes.items()} lstm_layer = gluon.rnn.LSTM(hidden_size, projection_size=projection_size, input_size=input_size, prefix='lstm0_') lstm_cell = gluon.contrib.rnn.LSTMPCell(hidden_size=hidden_size, projection_size=projection_size, input_size=input_size, prefix='lstm0_l0_') lstm_layer.initialize(ctx=ctx) lstm_cell.initialize(ctx=ctx) layer_params = lstm_layer.collect_params() cell_params = lstm_cell.collect_params() for k, v in weights.items(): layer_params['lstm0_l0_' + k].set_data(v.copy()) cell_params['lstm0_l0_' + k].set_data(v.copy()) with autograd.record(): layer_output = lstm_layer(lstm_input.copy()) cell_output = lstm_cell.unroll(seq_len, lstm_input.copy(), layout='TNC', merge_outputs=True)[0] assert_almost_equal(layer_output, cell_output, rtol=rtol, atol=atol) layer_output.backward() cell_output.backward() for k, v in weights.items(): layer_grad = layer_params['lstm0_l0_' + k].grad() cell_grad = cell_params['lstm0_l0_' + k].grad() print('checking gradient for {}'.format('lstm0_l0_' + k)) assert_almost_equal(layer_grad, cell_grad, rtol=rtol, atol=atol) check_rnn_layer_forward(gluon.rnn.LSTM( 10, 2, projection_size=5), mx.nd.ones((8, 3, 20)), ctx=ctx) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, projection_size=5, bidirectional=True), mx.nd.ones( (8, 3, 20)), [mx.nd.ones((4, 3, 5)), mx.nd.ones((4, 3, 10))], ctx=ctx) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, dropout=0.5, projection_size=5), mx.nd.ones((8, 3, 20)), run_only=True, ctx=ctx) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, bidirectional=True, dropout=0.5, projection_size=5), mx.nd.ones((8, 3, 20)), [mx.nd.ones((4, 3, 5)), mx.nd.ones((4, 3, 10))], run_only=True, ctx=ctx) lstm_layer.save_parameters('gpu_tmp.params') lstm_layer.load_parameters('gpu_tmp.params')
def test_lstmp(): hidden_size, projection_size = 3, 2 rtol, atol = 1e-2, 1e-2 batch_size, seq_len = 7, 11 input_size = 5 ctx = mx.gpu(0) lstm_input = mx.nd.uniform( shape=(seq_len, batch_size, input_size), ctx=ctx) shapes = {'i2h_weight': (hidden_size * 4, input_size), 'h2h_weight': (hidden_size * 4, projection_size), 'i2h_bias': (hidden_size * 4,), 'h2h_bias': (hidden_size * 4,), 'h2r_weight': (projection_size, hidden_size)} weights = {k: rand_ndarray(v) for k, v in shapes.items()} lstm_layer = gluon.rnn.LSTM(hidden_size, projection_size=projection_size, input_size=input_size, prefix='lstm0_') lstm_cell = gluon.contrib.rnn.LSTMPCell(hidden_size=hidden_size, projection_size=projection_size, input_size=input_size, prefix='lstm0_l0_') lstm_layer.initialize(ctx=ctx) lstm_cell.initialize(ctx=ctx) layer_params = lstm_layer.collect_params() cell_params = lstm_cell.collect_params() for k, v in weights.items(): layer_params['lstm0_l0_' + k].set_data(v.copy()) cell_params['lstm0_l0_' + k].set_data(v.copy()) with autograd.record(): layer_output = lstm_layer(lstm_input.copy()) cell_output = lstm_cell.unroll(seq_len, lstm_input.copy(), layout='TNC', merge_outputs=True)[0] assert_almost_equal(layer_output.asnumpy(), cell_output.asnumpy(), rtol=rtol, atol=atol) layer_output.backward() cell_output.backward() for k, v in weights.items(): layer_grad = layer_params['lstm0_l0_' + k].grad() cell_grad = cell_params['lstm0_l0_' + k].grad() print('checking gradient for {}'.format('lstm0_l0_' + k)) assert_almost_equal(layer_grad.asnumpy(), cell_grad.asnumpy(), rtol=rtol, atol=atol) check_rnn_layer_forward(gluon.rnn.LSTM( 10, 2, projection_size=5), mx.nd.ones((8, 3, 20)), ctx=ctx) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, projection_size=5, bidirectional=True), mx.nd.ones( (8, 3, 20)), [mx.nd.ones((4, 3, 5)), mx.nd.ones((4, 3, 10))], ctx=ctx) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, dropout=0.5, projection_size=5), mx.nd.ones((8, 3, 20)), run_only=True, ctx=ctx) check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, bidirectional=True, dropout=0.5, projection_size=5), mx.nd.ones((8, 3, 20)), [mx.nd.ones((4, 3, 5)), mx.nd.ones((4, 3, 10))], run_only=True, ctx=ctx)
def test_sparse_aggregator(): """aggregate sparse ndarray on muliple devices""" stype = 'row_sparse' kv = init_kv_with_str(stype) # devices num_devs = 4 devs = [mx.Context('cpu', i) for i in range(num_devs)] # single vals = [rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)] expected_sum = np.zeros(shape) for v in vals: expected_sum += v.asnumpy() # prepare row_ids all_rows = mx.nd.array(np.arange(shape[0]), dtype='int64') kv.push('a', vals) kv.row_sparse_pull('a', out=vals, row_ids=[all_rows] * len(vals)) result_sum = np.zeros(shape) for v in vals: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs) # list vals = [[rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)]] * len(keys) expected_sum = np.zeros(shape) for v in vals[0]: expected_sum += v.asnumpy() kv.push(str_keys, vals) kv.row_sparse_pull(str_keys, out=vals, row_ids=[[all_rows] * num_devs] * len(vals)) for vv in vals: result_sum = np.zeros(shape) for v in vv: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs)
def test_depthtospace(): def numpy_depth_to_space(x, blocksize): b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3] tmp = np.reshape(x, [b, blocksize, blocksize, c // (blocksize**2), h, w]) tmp = np.transpose(tmp, [0, 3, 4, 1, 5, 2]) y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize, w * blocksize]) return y shape_inp = (LARGE_X, 8, 4, 2) data = rand_ndarray(shape_inp, 'default') data_np = data.asnumpy() expected = numpy_depth_to_space(data_np, 2) output = mx.nd.depth_to_space(data, 2) assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
def test_spacetodepth(): def numpy_space_to_depth(x, blocksize): b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3] tmp = np.reshape(x, [b, c, h // blocksize, blocksize, w // blocksize, blocksize]) tmp = np.transpose(tmp, [0, 3, 5, 1, 2, 4]) y = np.reshape(tmp, [b, c * (blocksize**2), h // blocksize, w // blocksize]) return y shape_inp = (LARGE_X, 2, 8, 4) data = rand_ndarray(shape_inp, 'default') data_np = data.asnumpy() expected = numpy_space_to_depth(data_np, 2) output = mx.nd.space_to_depth(data, 2) assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
def compare_optimizer(opt1, opt2, shape, dtype, w_stype='default', g_stype='default', rtol=1e-4, atol=1e-5, compare_states=True): """Compare opt1 and opt2.""" if w_stype == 'default': w2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) w1 = w2.copyto(default_context()) elif w_stype == 'row_sparse' or w_stype == 'csr': w2 = rand_ndarray(shape, w_stype, density=1, dtype=dtype) w1 = w2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") if g_stype == 'default': g2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) g1 = g2.copyto(default_context()) elif g_stype == 'row_sparse' or g_stype == 'csr': g2 = rand_ndarray(shape, g_stype, dtype=dtype) g1 = g2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") state1 = opt1.create_state_multi_precision(0, w1) state2 = opt2.create_state_multi_precision(0, w2) if compare_states: compare_ndarray_tuple(state1, state2) opt1.update_multi_precision(0, w1, g1, state1) opt2.update_multi_precision(0, w2, g2, state2) if compare_states: compare_ndarray_tuple(state1, state2, rtol=rtol, atol=atol) assert_almost_equal(w1.asnumpy(), w2.asnumpy(), rtol=rtol, atol=atol)
def test_quadratic_forward(self): def f(x, a, b, c): return a * x**2 + b * x + c a = np.random.random_sample() b = np.random.random_sample() c = np.random.random_sample() for ndim in range(1, 6): shape = tu.rand_shape_nd(ndim, 5) data = tu.rand_ndarray(shape=shape, stype='default') data_np = data.asnumpy() expected = f(data_np, a, b, c) output = mx.nd.contrib.quadratic_v2(data=data, a=a, b=b, c=c).asnumpy() tu.assert_almost_equal(output, expected)
def test_sparse_aggregator(): """aggregate sparse ndarray on muliple devices""" stype = 'row_sparse' kv = init_kv(stype) # devices num_devs = 4 devs = [mx.Context('cpu', i) for i in range(num_devs)] # single vals = [rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)] expected_sum = np.zeros(shape) for v in vals: expected_sum += v.asnumpy() kv.push(3, vals) kv.pull(3, out = vals) result_sum = np.zeros(shape) for v in vals: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs) # list vals = [[rand_ndarray(shape, stype).copyto(devs[i]) for i in range(num_devs)]] * len(keys) expected_sum = np.zeros(shape) for v in vals[0]: expected_sum += v.asnumpy() kv.push(keys, vals) kv.pull(keys, out = vals) for vv in vals: result_sum = np.zeros(shape) for v in vv: result_sum += v.asnumpy() assert_almost_equal(result_sum, expected_sum * num_devs)
def test_np_reshape(): class TestReshape(HybridBlock): def __init__(self, newshape): super(TestReshape, self).__init__() self._newshape = newshape def hybrid_forward(self, F, a): return F.np.reshape(a, self._newshape) shape_pairs = [((2, 6), (6, 2)), ((2, 6), (3, 4)), ((1, 0), (0, )), ((0, 0), (0, )), ((), (1, 1, 1))] for hybridize in [True, False]: for shape_pair in shape_pairs: shape1, shape2 = shape_pair print(shape1, shape2) test_reshape = TestReshape(shape2) if hybridize: test_reshape.hybridize() x = rand_ndarray(shape1).as_np_ndarray() x.attach_grad() np_out = _np.reshape(x.asnumpy(), shape2) with mx.autograd.record(): mx_out = test_reshape(x) assert mx_out.shape == np_out.shape assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = _np.ones(shape1) assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out = np.reshape(x, shape2) np_out = _np.reshape(x.asnumpy(), shape2) assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False)
def test_quantize_float32_to_int8(): shape = rand_shape_nd(4) data = rand_ndarray(shape, 'default', dtype='float32') min_range = mx.nd.min(data) max_range = mx.nd.max(data) qdata, min_val, max_val = mx.nd.contrib.quantize(data, min_range, max_range, out_type='int8') data_np = data.asnumpy() min_range = min_range.asscalar() max_range = max_range.asscalar() real_range = np.maximum(np.abs(min_range), np.abs(max_range)) quantized_range = 127.0 scale = quantized_range / real_range assert qdata.dtype == np.int8 assert min_val.dtype == np.float32 assert max_val.dtype == np.float32 assert same(min_val.asscalar(), -real_range) assert same(max_val.asscalar(), real_range) qdata_np = (np.sign(data_np) * np.minimum(np.abs(data_np) * scale + 0.5, quantized_range)).astype(np.int8) assert same(qdata.asnumpy(), qdata_np)
def check_regression(symbol, forward, shape): # init executor data_s = mx.symbol.Variable('data') label_s = mx.symbol.Variable('label') out_s = symbol(data=data_s, label=label_s) exe = out_s.simple_bind(ctx=mx.cpu(0), data=shape, label=shape) arg_map = dict(zip(out_s.list_arguments(), exe.arg_arrays)) # init data data = mx.random.uniform(-1, -1, shape) arg_map["data"][:] = data atol = 1e-5 density = 0.5 stype = 'default' label = arg_map["label"] label[:] = rand_ndarray(shape, stype, density=density) exe.forward(is_train=True) exe.backward() np_out = forward(data.asnumpy()) assert_almost_equal(exe.outputs[0].asnumpy(), np_out, atol=atol)
def test_quantize_float32_to_int8(): shape = rand_shape_nd(4) data = rand_ndarray(shape, 'default', dtype='float32') min_range = mx.nd.min(data) max_range = mx.nd.max(data) qdata, min_val, max_val = mx.nd.contrib.quantize(data, min_range, max_range, out_type='int8') data_np = data.asnumpy() min_range = min_range.asscalar() max_range = max_range.asscalar() real_range = np.maximum(np.abs(min_range), np.abs(max_range)) quantized_range = 127.0 scale = quantized_range / real_range assert qdata.dtype == np.int8 assert min_val.dtype == np.float32 assert max_val.dtype == np.float32 assert same(min_val.asscalar(), -real_range) assert same(max_val.asscalar(), real_range) qdata_np = (np.sign(data_np) * np.minimum(np.abs(data_np) * scale + 0.5, quantized_range)).astype(np.int8) assert_almost_equal(qdata.asnumpy(), qdata_np, atol = 1)
def test_np_tensordot(): class TestTensordot(HybridBlock): def __init__(self, axes): super(TestTensordot, self).__init__() self._axes = axes def hybrid_forward(self, F, a, b): return F.np.tensordot(a, b, self._axes) def tensordot_backward(a, b, axes=2): if (a.ndim < 1) or (b.ndim < 1): raise ValueError('An input is zero-dim') if _np.isscalar(axes): a_axes_summed = [i + a.ndim - axes for i in range(axes)] b_axes_summed = [i for i in range(axes)] else: if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') a_axes_summed, b_axes_summed = axes if _np.isscalar(a_axes_summed): a_axes_summed = a_axes_summed, if _np.isscalar(b_axes_summed): b_axes_summed = b_axes_summed, for i in range(len(a_axes_summed)): a_axes_summed[i] = (a_axes_summed[i] + a.ndim) % a.ndim for i in range(len(b_axes_summed)): b_axes_summed[i] = (b_axes_summed[i] + b.ndim) % b.ndim if len(a_axes_summed) != len(b_axes_summed): raise ValueError('Axes length mismatch') a_axes_remained = [] for i in range(a.ndim): if not (i in a_axes_summed): a_axes_remained.append(i) a_axes = a_axes_remained[:] + a_axes_summed[:] b_axes_remained = [] for i in range(b.ndim): if not (i in b_axes_summed): b_axes_remained.append(i) b_axes = b_axes_summed[:] + b_axes_remained[:] ad1 = _np.prod([a.shape[i] for i in a_axes_remained ]) if len(a_axes_remained) > 0 else 1 ad2 = _np.prod([a.shape[i] for i in a_axes_summed ]) if len(a_axes_summed) > 0 else 1 bd1 = _np.prod([b.shape[i] for i in b_axes_summed ]) if len(b_axes_summed) > 0 else 1 bd2 = _np.prod([b.shape[i] for i in b_axes_remained ]) if len(b_axes_remained) > 0 else 1 out_grad = _np.ones((ad1, bd2)) new_a = _np.transpose(a, a_axes) new_a_shape = new_a.shape[:] new_a = new_a.reshape((ad1, ad2)) new_b = _np.transpose(b, b_axes) new_b_shape = new_b.shape[:] new_b = new_b.reshape((bd1, bd2)) reverse_a_axes = [0 for i in a_axes] for i in range(len(a_axes)): reverse_a_axes[a_axes[i]] = i reverse_b_axes = [0 for i in b_axes] for i in range(len(b_axes)): reverse_b_axes[b_axes[i]] = i grad_b = _np.dot(new_a.T, out_grad).reshape(new_b_shape) grad_b = _np.transpose(grad_b, reverse_b_axes) grad_a = _np.dot(out_grad, new_b.T).reshape(new_a_shape) grad_a = _np.transpose(grad_a, reverse_a_axes) return [grad_a, grad_b] # test non zero size input tensor_shapes = [ ((3, 5), (5, 4), 1), # (a_shape, b_shape, axes) ((3, ), (3, ), 1), ((3, 4, 5, 3, 2), (5, 3, 2, 1, 2), 3), ((3, 5, 4, 3, 2), (2, 3, 5, 1, 2), [[1, 3, 4], [2, 1, 0]]), ((3, 5, 4), (5, 4, 3), [[1, 0, 2], [0, 2, 1]]), ((3, 5, 4), (5, 3, 4), [[2, 0], [-1, -2]]), ((2, 2), (2, 2), 2), ((3, 5, 4), (5, ), [[-2], [0]]), ((3, 5, 4), (5, ), [[1], [0]]), ((2, ), (2, 3), 1), ((3, ), (3, ), 0), ((2, ), (2, 3), 0), ((3, 5, 4), (5, ), 0), ((2, 3, 4), (4, 3, 2), [[], []]), ((3, 0), (0, 5), 1), ((3, 0), (0, 4), [[1], [0]]), ((0, 3), (3, 5), 1), ((0, 3), (5, 0), [[0], [1]]) ] for hybridize in [True, False]: for a_shape, b_shape, axes in tensor_shapes: for dtype in [_np.float32, _np.float64]: test_tensordot = TestTensordot(axes) if hybridize: test_tensordot.hybridize() a = rand_ndarray(shape=a_shape, dtype=dtype).as_np_ndarray() b = rand_ndarray(shape=b_shape, dtype=dtype).as_np_ndarray() a.attach_grad() b.attach_grad() np_out = _np.tensordot(a.asnumpy(), b.asnumpy(), axes) with mx.autograd.record(): mx_out = test_tensordot(a, b) assert mx_out.shape == np_out.shape assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) mx_out.backward() np_backward = tensordot_backward(a.asnumpy(), b.asnumpy(), axes) assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol=1e-3, atol=1e-5) assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-3, atol=1e-5) # Test imperative once again mx_out = np.tensordot(a, b, axes) np_out = _np.tensordot(a.asnumpy(), b.asnumpy(), axes) assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) # test numeric gradient if (_np.prod(a_shape) > 0 and _np.prod(b_shape) > 0): a_sym = mx.sym.Variable("a").as_np_ndarray() b_sym = mx.sym.Variable("b").as_np_ndarray() mx_sym = mx.sym.np.tensordot(a_sym, b_sym, axes).as_nd_ndarray() check_numeric_gradient( mx_sym, [a.as_nd_ndarray(), b.as_nd_ndarray()], rtol=1e-1, atol=1e-1, dtype=dtype)
def check_binary_op_result(shape1, shape2, op, dtype=None): if shape1 is None: mx_input1 = abs(_np.random.uniform()) + 1 np_input1 = mx_input1 else: mx_input1 = (rand_ndarray(shape1, dtype=dtype).abs() + 1).as_np_ndarray() mx_input1.attach_grad() np_input1 = mx_input1.asnumpy() if shape2 is None: mx_input2 = abs(_np.random.uniform()) + 1 np_input2 = mx_input2 else: mx_input2 = (rand_ndarray(shape2, dtype=dtype).abs() + 1).as_np_ndarray() mx_input2.attach_grad() np_input2 = mx_input2.asnumpy() scalar = None reverse = False if isinstance(mx_input1, mx.nd.NDArray) and not isinstance(mx_input2, mx.nd.NDArray): scalar = mx_input2 reverse = False elif isinstance(mx_input2, mx.nd.NDArray) and not isinstance(mx_input1, mx.nd.NDArray): scalar = mx_input1 reverse = True grad_func = _get_grad_func(op, scalar, reverse) np_out = get_np_ret(np_input1, np_input2, op) ograd = _np.ones_like(np_out) for hybridize in [True, False]: if scalar is None: get_mx_ret_np = TestBinaryElementWiseOp(op) get_mx_ret_classic = TestBinaryElementWiseOp(op) if hybridize: get_mx_ret_np.hybridize() get_mx_ret_classic.hybridize() if grad_func is None: mx_out = get_mx_ret_np(mx_input1, mx_input2) else: with mx.autograd.record(): mx_out = get_mx_ret_np(mx_input1, mx_input2) mx_out.backward() assert type(mx_out) == np.ndarray if op in logic_ops: assert np_out.dtype == mx_out.dtype assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5, use_broadcast=False) if grad_func is not None: x1_grad_expected, x2_grad_expected = grad_func(ograd, np_input1, np_input2, np_out) assert_almost_equal(mx_input1.grad.asnumpy(), x1_grad_expected, atol=1e-5, rtol=1e-3, use_broadcast=False) assert_almost_equal(mx_input2.grad.asnumpy(), x2_grad_expected, atol=1e-5, rtol=1e-3, use_broadcast=False) else: get_mx_ret = TestBinaryElementWiseOp(op, scalar=scalar, reverse=reverse) if hybridize: get_mx_ret.hybridize() if reverse: mx_input = mx_input2 else: mx_input = mx_input1 if grad_func is None: mx_out = get_mx_ret(mx_input) else: with mx.autograd.record(): mx_out = get_mx_ret(mx_input) mx_out.backward() assert type(mx_out) == np.ndarray if op in logic_ops: assert np_out.dtype == mx_out.dtype assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5, use_broadcast=False) # check grad if grad_func is not None: x_grad_expected = grad_func(ograd, np_input1, np_input2, np_out) assert_almost_equal(mx_input.grad.asnumpy(), x_grad_expected, atol=1e-5, rtol=1e-3, use_broadcast=False)