def symbolic(exec_backward=True): x = mx.sym.Variable('x') y = mx.sym.Variable('y') z = mx.sym.Variable('z') x_shape = (2, 2) z_shape = (3, 2) inputs = [x, y] out = mx.symbol.ElementWiseSum(*inputs, name="esum") out = mx.sym.dot(z, out) out2 = mx.sym.random.normal(0, -1, x_shape, ctx=default_context()) out = mx.sym.dot(out, out2) out = mx.sym.make_loss(out) arr = { 'x': mx.nd.random.normal(0, 1, x_shape, ctx=default_context()), 'y': mx.nd.random.normal(0, 1, x_shape, ctx=default_context()), 'z': mx.nd.random.normal(0, 1, z_shape, ctx=default_context()) } arr_grad = { 'x': mx.nd.empty(x_shape), 'y': mx.nd.empty(x_shape), 'z': mx.nd.empty(z_shape) } exec1 = out.bind(ctx=default_context(), args=arr, args_grad=arr_grad) outputs = exec1.forward() if exec_backward: exec1.backward() exec1.grad_arrays[0].asnumpy() else: outputs[0].asnumpy()
def symbolic(exec_backward=True, waitall=True): x = mx.sym.Variable('x') y = mx.sym.Variable('y') z = mx.sym.Variable('z') x_shape = (2, 2) z_shape = (3, 2) inputs = [x, y] out = mx.symbol.ElementWiseSum(*inputs, name="esum") out = mx.sym.dot(z, out) out2 = mx.sym.random.normal(0, -1, x_shape, ctx=default_context()) out = mx.sym.dot(out, out2) out = mx.sym.make_loss(out) arr = {'x': mx.nd.random.normal(0, 1, x_shape, ctx=default_context()), 'y': mx.nd.random.normal(0, 1, x_shape, ctx=default_context()), 'z': mx.nd.random.normal(0, 1, z_shape, ctx=default_context())} arr_grad = {'x': mx.nd.empty(x_shape), 'y': mx.nd.empty(x_shape), 'z': mx.nd.empty(z_shape)} exec1 = out.bind(ctx=default_context(), args=arr, args_grad=arr_grad) outputs = exec1.forward() if exec_backward: exec1.backward() if waitall: mx.nd.waitall() else: exec1.grad_arrays[0].asnumpy() else: if waitall: mx.nd.waitall() else: outputs[0].asnumpy()
def gluon(exec_wait=True): model = nn.Sequential() model.add(nn.Dense(128, activation='tanh', in_units=10, flatten=False)) model.add(nn.Dropout(1)) model.add(nn.Dense(64, activation='tanh', in_units=256), nn.Dense(32, in_units=64)) x = mx.sym.var('data') y = model(x) model.collect_params().initialize(ctx=[default_context()]) z = model(mx.nd.random.normal(10, -10, (32, 2, 10), ctx=default_context())) if exec_wait: z.wait_to_read()
def gluon(exec_wait=True): model = nn.Sequential() model.add(nn.Dense(128, activation='tanh', in_units=10, flatten=False)) model.add(nn.Dropout(1)) model.add(nn.Dense(64, activation='tanh', in_units=256), nn.Dense(32, in_units=64)) x = mx.sym.var('data') y = model(x) model.collect_params().initialize(ctx=[default_context()]) z = model( mx.nd.random.normal(10, -10, (32, 2, 10), ctx=default_context())) if exec_wait: z.wait_to_read()
def test_exc_multiple_waits(): caught = False try: a = mx.nd.random.normal(0, -1, (2, 2)).copyto(default_context()) a.wait_to_read() except MXNetError: caught = True assert caught, "No exception thrown" try: b = mx.nd.random.normal(0, -1, (2, 2)).copyto(default_context()) b.wait_to_read() except MXNetError: caught = True assert caught, "No exception thrown"
def test_norm(ctx=default_context()): np_arr = np.random.uniform(size=(3, 3, 3, 3)) mx_arr = mx.nd.array(np_arr, ctx=ctx) arr1 = np.linalg.norm(np_arr, keepdims=False) arr2 = mx.nd.norm(mx_arr, keepdims=False) print(arr1) print(arr2.asnumpy()) mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy()[0]) for i in range(4): arr1 = np.linalg.norm(np_arr, axis=i, keepdims=False) arr2 = mx.nd.norm(mx_arr, axis=i, keepdims=False) assert arr1.shape == arr2.shape mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy()) arr1 = np.linalg.norm(np_arr, axis=i, keepdims=True) arr2 = mx.nd.norm(mx_arr, axis=i, keepdims=True) assert arr1.shape == arr2.shape mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy()) if (i < 3): arr1 = np.linalg.norm(np_arr, axis=(i, i+1), keepdims=False) arr2 = mx.nd.norm(mx_arr, axis=(i, i+1), keepdims=False) assert arr1.shape == arr2.shape mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy()) arr1 = np.linalg.norm(np_arr, axis=(i, i+1), keepdims=True) arr2 = mx.nd.norm(mx_arr, axis=(i, i+1), keepdims=True) assert arr1.shape == arr2.shape mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy())
def check_ste(net_type_str, w_init, hybridize, in_data, ctx=None): ctx = ctx or default_context() net = eval(net_type_str)(w_init=w_init) if hybridize: net.hybridize() # Init net.collect_params().initialize(mx.init.Constant([w_init]), ctx=ctx) # Test: in_data = in_data.as_in_context(ctx) with mx.autograd.record(): out = net(in_data) assert all(out == net.expected_output(in_data, w_init)), net_type_str + " output is " + str(out) + ", but" + \ " expected " + str(net.expected_output(in_data, w_init)) out.backward() assert all(net.w.grad() == net.expected_grads(in_data, w_init)), net_type_str + " w grads are " + \ str(net.w.grad()) + " but expected " + \ str(net.expected_grads(in_data, w_init)) with mx.autograd.record(): out = net(in_data) assert all(out == net.expected_output(in_data, w_init)), net_type_str + " output is " + str(out) + ", but" + \ " expected " + str(net.expected_output(in_data, w_init)) out.backward() assert all(net.w.grad() == net.expected_grads(in_data, w_init)), net_type_str + " w grads are " + \ str(net.w.grad()) + " but expected " + \ str(net.expected_grads(in_data, w_init))
def mutable_var_check(waitall=False): a, b = mx.nd.random_normal(0, -1, (2, 2)).copyto(default_context()) a = mx.nd.dot(a, a) if waitall: mx.nd.waitall() else: a.asnumpy()
def testSoftmaxOutput(): x = mx.sym.Variable('x') label = mx.sym.Variable('label') x_nd = mx.nd.ones((LARGE_X, SMALL_Y)) grad_x = mx.nd.zeros((LARGE_X, SMALL_Y)) label_nd = mx.nd.ones((LARGE_X)) sym = mx.sym.SoftmaxOutput(data=x, label=label, ignore_label=0, use_ignore=False) ex = sym.bind(ctx=default_context(), args={ 'x': x_nd, 'label': label_nd }, args_grad={'x': grad_x}) ex.forward(is_train=True) softmax_out = ex.outputs[0][0].asnumpy() expected_softmax_out = (1 / SMALL_Y) * mx.nd.ones((SMALL_Y)).asnumpy() assert np.isclose(softmax_out, expected_softmax_out).all() ex.backward(is_train=True) grad_out = ex.grad_arrays[0][0].asnumpy() k = int(label_nd[0].asscalar()) expected_grad_out = np.zeros((SMALL_Y, )) expected_grad_out[k] = -1 assert np.isclose(grad_out - softmax_out, expected_grad_out).all()
def test_dropout(): shape = (10, 10) x = mx.sym.var('data') y = mx.sym.Dropout(x, p=1, cudnn_off=True) exe = y.simple_bind(ctx=default_context(), data=shape) exe.arg_arrays[0][:] = 1 out = exe.forward(is_train=True) out[0].wait_to_read()
def test_dropout(): shape = (LARGE_X, SMALL_Y) x = mx.sym.var('data') y = mx.sym.Dropout(x, p=1, cudnn_off=True) exe = y.simple_bind(ctx=default_context(), data=shape) exe.arg_arrays[0][:] = 1 out = exe.forward(is_train=True) assert out.shape == out.shape
def test_multiple_waitalls(): caught = False try: a = mx.nd.random.normal(0, -1, (2, 2)).copyto(default_context()) mx.nd.waitall() except MXNetError: caught = True assert caught, "No exception thrown" mx.nd.waitall()
def test_exc_post_fail(): caught = False try: a, b = mx.nd.random_normal(0, -1, (2, 2)).copyto(default_context()) a.asnumpy() except MXNetError: caught = True assert caught, "No exception thrown" b.asnumpy()
def check_fluent_regular(func, kwargs, shape=(5, 17, 1), equal_nan=False): with mx.name.NameManager(): data = mx.nd.random_uniform(shape=shape, ctx=default_context()) regular = getattr(mx.ndarray, func)(data, **kwargs) fluent = getattr(data, func)(**kwargs) if isinstance(regular, list): for r, f in zip(regular, fluent): assert almost_equal(r.asnumpy(), f.asnumpy(), equal_nan=equal_nan) else: assert almost_equal(regular.asnumpy(), fluent.asnumpy(), equal_nan=equal_nan)
def compare_optimizer(opt1, opt2, shape, dtype, w_stype='default', g_stype='default', rtol=1e-4, atol=1e-5, compare_states=True): """Compare opt1 and opt2.""" if not isinstance(shape, list): if w_stype == 'default': w2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) w1 = w2.copyto(default_context()) elif w_stype == 'row_sparse' or w_stype == 'csr': w2 = rand_ndarray(shape, w_stype, density=1, dtype=dtype) w1 = w2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") if g_stype == 'default': g2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) g1 = g2.copyto(default_context()) elif g_stype == 'row_sparse' or g_stype == 'csr': g2 = rand_ndarray(shape, g_stype, dtype=dtype) g1 = g2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") state1 = opt1.create_state_multi_precision(0, w1) state2 = opt2.create_state_multi_precision(0, w2) if compare_states: compare_ndarray_tuple(state1, state2) opt1.update_multi_precision(0, w1, g1, state1) opt2.update_multi_precision(0, w2, g2, state2) if compare_states: compare_ndarray_tuple(state1, state2, rtol=rtol, atol=atol) assert_almost_equal(w1.asnumpy(), w2.asnumpy(), rtol=rtol, atol=atol) else: # test multi-tensor: Opt1 single-tensor reference, Opt2 multi-tensor from copy import deepcopy ntensors = len(shape) w1, g1 = [], [] for s in shape: w1.append(mx.random.uniform(shape=s, ctx=default_context(), dtype=dtype)) g1.append(mx.random.uniform(shape=s, ctx=default_context(), dtype=dtype)) w1 = tuple(w1) w2 = deepcopy(w1) g1 = tuple(g1) g2 = deepcopy(g1) state2 = [opt2.create_state_multi_precision(0, w2[i]) for i in range(ntensors)] opt2.update_multi_precision(list(range(ntensors)), w2, g2, state2) for i in range(ntensors): state1 = opt1.create_state_multi_precision(i, w1[i]) opt1.update_multi_precision(i, w1[i], g1[i], state1) if compare_states: compare_ndarray_tuple(state1, state2[i], rtol, atol) assert_almost_equal(w1[i].asnumpy(), w2[i].asnumpy(), rtol=rtol, atol=atol)
def post_fail(waitall=False): caught = False try: a, b = mx.nd.random_normal(0, -1, (2, 2)).copyto(default_context()) if waitall: mx.nd.waitall() else: a.asnumpy() except MXNetError: caught = True assert caught, "No exception thrown" b.asnumpy()
def run_training_iteration(data): output = net(data) net = gluon.nn.HybridSequential() net.add(gluon.nn.Dense(10)) ctx = default_context() net.initialize(mx.init.Xavier(), ctx=ctx) data = mx.nd.ones((3, 4)) mx.profiler.set_state("run") run_training_iteration(data) mx.nd.waitall() mx.profiler.set_state("stop")
def multiple_waits(waitall=False): # Test calling failed op followed by wait_to_read or waitall twice # Intention is to test rethrow for multiple wait_to_reads and waitalls # for vars with exceptions in same scope caught = False try: a = mx.nd.random.normal(0, -1, (2, 2)).copyto(default_context()) if waitall: mx.nd.waitall() else: a.wait_to_read() except MXNetError: caught = True assert caught, "No exception thrown, exception should be rethrown with wait_to_read/waitall" try: b = mx.nd.random.normal(0, -1, (2, 2)).copyto(default_context()) if waitall: mx.nd.waitall() else: b.wait_to_read() except MXNetError: caught = True assert caught, "No exception thrown, exception should be rethrown with wait_to_read/waitall"
def test_exc_profiler(): def run_training_iteration(data): output = net(data) net = gluon.nn.HybridSequential() with net.name_scope(): net.add(gluon.nn.Dense(10)) ctx = default_context() net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) data = mx.nd.ones((3, 4)) mx.profiler.set_state("run") run_training_iteration(data) mx.nd.waitall() mx.profiler.set_state("stop")
def _get_symbolic_result(out_grads, n_steps): def _copy_args_dict(name_list): return {name: args[name].copy() for name in name_list} def _zeros_like_dict(name_list): return {name: mx.nd.zeros_like(args[name]) for name in name_list} free_syms = _create_vars(len(free_var_shapes), "FreeVar") loop_syms = _create_vars(len(loop_var_shapes), "LoopVar") outputs, final_loop_syms = mx.sym.contrib.while_loop( cond=lambda *_loop_vars: cond(_loop_vars, free_syms), func=lambda *_loop_vars: func(_loop_vars, free_syms), loop_vars=loop_syms, max_iterations=max_iterations, ) if n_steps == 0: outputs = [] else: outputs = [ x.slice_axis(axis=0, begin=0, end=n_steps) for x in outputs ] loop_result_sym = [x * 2 for x in outputs ] + [x * 3 for x in final_loop_syms] loop_result_sym = mx.sym.Group(loop_result_sym) loop_var_start = int(is_for) args_names = ["FreeVar" + str(i) for i, _ in enumerate(free_var_shapes)] \ + ["LoopVar" + str(i) for i, _ in enumerate(loop_var_shapes) if i >= loop_var_start] args_grad = None if not is_train else _zeros_like_dict( x for x in args_names) executor = loop_result_sym.bind( ctx=default_context(), args=_copy_args_dict(loop_result_sym.list_inputs()), args_grad=args_grad, ) loop_result_nd = executor.forward(is_train=is_train) grads = [] if is_train: executor.backward(out_grads=out_grads) grads = [executor.grad_dict.get("FreeVar" + str(i), None) for i, _ in enumerate(free_var_shapes)] \ + [executor.grad_dict.get("LoopVar" + str(i), None) for i, _ in enumerate(loop_var_shapes) if i >= loop_var_start] return _to_numpy_list(loop_result_nd), _to_numpy_list(grads)
def check_row_sparse_pull(kv, count, ctx=default_context()): num_rows = shape[0] vals = [] row_ids = [] all_row_ids = np.arange(num_rows) for i in range(count): vals.append(mx.nd.zeros(shape, ctx=ctx).tostype('row_sparse')) row_id = np.random.randint(num_rows, size=num_rows) row_ids.append(mx.nd.array(row_id, dtype='int64')) row_ids_to_pull = row_ids[0] if len(row_ids) == 1 else row_ids vals_to_pull = vals[0] if len(vals) == 1 else vals kv.row_sparse_pull('e', out=vals_to_pull, row_ids=row_ids_to_pull) for val, row_id in zip(vals, row_ids): retained = val.asnumpy() excluded_row_ids = np.setdiff1d(all_row_ids, row_id.asnumpy()) for row in range(num_rows): expected_val = np.zeros_like(retained[row]) expected_val += 0 if row in excluded_row_ids else 1 assert_almost_equal(retained[row], expected_val)
def _get_sym_result(is_train, args, args_grad, out_grad): args = {k: v.copy() for k, v in args.items()} args_grad = {k: v.copy() for k, v in args_grad.items()} i, j, x_sum, sc = [ mx.sym.var("i"), mx.sym.var("j"), mx.sym.var("x_sum"), mx.sym.var("sc"), ] result_sym = mx.sym.Group(make_loop(i, j, x_sum, sc)) executor = result_sym.bind( ctx=default_context(), args=args, args_grad=args_grad, ) results = executor.forward(is_train=is_train) if not is_train: return _to_np_list(results), [] executor.backward(out_grads=out_grad) grads = [executor.grad_dict["x_sum"], executor.grad_dict["sc"]] return _to_np_list(results), _to_np_list(grads)
def compare_optimizer(opt1, opt2, shape, dtype, w_stype='default', g_stype='default', rtol=1e-4, atol=1e-5, compare_states=True): """Compare opt1 and opt2.""" if w_stype == 'default': w2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) w1 = w2.copyto(default_context()) elif w_stype == 'row_sparse' or w_stype == 'csr': w2 = rand_ndarray(shape, w_stype, density=1, dtype=dtype) w1 = w2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") if g_stype == 'default': g2 = mx.random.uniform(shape=shape, ctx=default_context(), dtype=dtype) g1 = g2.copyto(default_context()) elif g_stype == 'row_sparse' or g_stype == 'csr': g2 = rand_ndarray(shape, g_stype, dtype=dtype) g1 = g2.copyto(default_context()).tostype('default') else: raise Exception("type not supported yet") state1 = opt1.create_state_multi_precision(0, w1) state2 = opt2.create_state_multi_precision(0, w2) if compare_states: compare_ndarray_tuple(state1, state2) opt1.update_multi_precision(0, w1, g1, state1) opt2.update_multi_precision(0, w2, g2, state2) if compare_states: compare_ndarray_tuple(state1, state2, rtol=rtol, atol=atol) assert_almost_equal(w1.asnumpy(), w2.asnumpy(), rtol=rtol, atol=atol)
def mutable_var_check(): a, b = mx.nd.random_normal(0, -1, (2, 2)).copyto(default_context()) a = mx.nd.dot(a, a) a.asnumpy()
def check_unroll(cell_type, num_states, layout): batch_size = 20 input_size = 50 hidden_size = 30 seq_len = 10 if layout == 'TNC': rnn_data = mx.nd.normal(loc=0, scale=1, shape=(seq_len, batch_size, input_size)) elif layout == 'NTC': rnn_data = mx.nd.normal(loc=0, scale=1, shape=(batch_size, seq_len, input_size)) else: print("Wrong layout") return valid_length = mx.nd.round(mx.nd.random.uniform(low=1, high=10, shape=(batch_size))) state_shape = (batch_size, hidden_size) states = [mx.nd.normal(loc=0, scale=1, shape=state_shape) for i in range(num_states)] cell = cell_type(hidden_size, prefix='rnn_') cell.initialize(ctx=default_context()) if layout == 'TNC': cell(rnn_data[0], states) else: cell(rnn_data[:,0,:], states) params1 = cell.collect_params() orig_params1 = copy.deepcopy(params1) trainer = gluon.Trainer(params1, 'sgd', {'learning_rate' : 0.03}) with mx.autograd.record(): res1, states1 = cell.unroll(seq_len, rnn_data, states, valid_length=valid_length, layout=layout, merge_outputs=True) res1.backward() trainer.step(batch_size) configs = [ lambda layer: None, lambda layer: layer.hybridize(), lambda layer: layer.hybridize({'inline_limit': 0}), lambda layer: layer.hybridize({'static_alloc': True}), lambda layer: layer.hybridize({'static_alloc': True, 'static_shape': True}) ] # We can't pass None to a hybrid block, but it accepts an empty list. # so we use an empty list to represent valid_length if it's None. if valid_length is None: valid_length = [] for config in configs: layer = TestRNNLayer(cell_type, hidden_size, layout) layer.initialize(ctx=default_context()) config(layer) res2, states2 = layer(rnn_data, states, valid_length) params2 = layer.collect_params() for key, val in orig_params1.items(): params2[key].set_data(copy.deepcopy(val.data())) trainer = gluon.Trainer(params2, 'sgd', {'learning_rate' : 0.03}) with mx.autograd.record(): res2, states2 = layer(rnn_data, states, valid_length) assert_almost_equal(res1.asnumpy(), res2.asnumpy(), rtol=0.001, atol=0.0001) assert len(states1) == len(states2) for i in range(len(states1)): assert_almost_equal(states1[i].asnumpy(), states2[i].asnumpy(), rtol=0.001, atol=0.0001) res2.backward() trainer.step(batch_size) for key, val in params1.items(): weight1 = val.data() weight2 = params2[key].data() assert_almost_equal(weight1.asnumpy(), weight2.asnumpy(), rtol=0.001, atol=0.0001)
atol=1e-3) # with propogating shapes/types mysym3 = sym.optimize_for("myProp", arg_array) exe3 = mysym3.bind(ctx=mx.cpu(), args=args) out3 = exe3.forward() # check that result matches one executed by MXNet assert_almost_equal(out[0].asnumpy(), out3[0].asnumpy(), rtol=1e-3, atol=1e-3) @unittest.skipIf(check_platform(), "not all machine types supported") @unittest.skipIf(is_cd_run(), "continuous delivery run - ignoring test") @unittest.skipIf(default_context().device_type == 'cpu', "ignoring custom_op_gpu test on cpu run") def test_custom_op_gpu(): # possible places to find library file if (os.name == 'posix'): lib = 'libcustomop_gpu_lib.so' if os.path.exists(lib): fname = lib elif os.path.exists('build/' + lib): fname = 'build/' + lib else: raise MXNetError("library %s not found " % lib) elif (os.name == 'nt'): lib = 'libcustomop_gpu_lib.dll' if os.path.exists('windows_package\\lib\\' + lib): fname = 'windows_package\\lib\\' + lib
def test_order(ctx=default_context()): def gt_topk(dat, axis, ret_typ, k, is_ascend): if ret_typ == "indices": if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k-1, -1) ret = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap') elif ret_typ == "value": if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k-1, -1) ret = np.take(np.sort(dat, axis=axis), axis=axis, indices=indices, mode='wrap') else: assert dat.shape == (5, 5, 5, 5) assert axis is None or axis ==1 ret = np.zeros(dat.shape) if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k-1, -1) gt_argsort = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap') if axis is None: ret.ravel()[gt_argsort] = 1 else: for i in range(5): for j in range(5): for k in range(5): ret[i, gt_argsort[i, :, j, k], j, k] = 1 return ret a_npy = np.random.normal(size=(5, 5, 5, 5)) a_nd = mx.nd.array(a_npy, ctx=ctx) # test for ret_typ=indices nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="indices", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="indices", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="indices", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="indices", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="indices", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="indices", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=value nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="value", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="value", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="value", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="value", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=mask nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="mask", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="mask", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="mask", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="mask", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="mask", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="mask", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=both nd_ret_topk_val, nd_ret_topk_ind = mx.nd.topk(a_nd, axis=1, ret_typ="both", k=3, is_ascend=True) nd_ret_topk_val = nd_ret_topk_val.asnumpy() nd_ret_topk_ind = nd_ret_topk_ind.asnumpy() gt_val = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) gt_ind = gt_topk(a_npy, axis=1, ret_typ="indices", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk_val, gt_val) assert_almost_equal(nd_ret_topk_ind, gt_ind) # test for sort nd_ret_sort = mx.nd.sort(a_nd, axis=1, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=5, is_ascend=True) assert_almost_equal(nd_ret_sort, gt) nd_ret_sort = mx.nd.sort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=5*5*5*5, is_ascend=False) assert_almost_equal(nd_ret_sort, gt) # test for argsort nd_ret_argsort = mx.nd.argsort(a_nd, axis=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="indices", k=5, is_ascend=True) assert_almost_equal(nd_ret_argsort, gt) nd_ret_argsort = mx.nd.argsort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="indices", k=5*5*5*5, is_ascend=False) assert_almost_equal(nd_ret_argsort, gt) # test topk with a big shape a = mx.nd.arange(0, 54686454, step=1, repeat=1) assert_almost_equal(a.topk(k=54686454).asnumpy(), a.asnumpy()[::-1])
def test_while_loop_rnn(): def _array(shape): return mx.nd.random.uniform(-1.0, 1.0, shape=shape) cell_types = [mx.rnn.LSTMCell] num_params = [2] batch_size = 2 hidden_dim = 3 input_dim = 4 seq_len = 3 for cell, n_param in zip(cell_types, num_params): # using while_loop params = mx.rnn.RNNParams() data = mx.sym.var("data") iter_i = mx.sym.var("i") def _cond(*states): i = states[0] return i < seq_len def _func(*states): i = states[0] states = states[1:] in_ = data.take(i).squeeze(axis=0) rnn = cell(hidden_dim, prefix='', params=params) next_hidden, next_states = rnn(in_, states) return [next_hidden], [i + 1] + list(next_states) states = [mx.sym.var("s_" + str(i)) for i in range(n_param)] result = mx.sym.contrib.while_loop(cond=_cond, func=_func, loop_vars=[iter_i] + states, max_iterations=seq_len) result = mx.sym.Group(result[0] + result[1][1:]) arg_shapes, _, _ = result.infer_shape( data=(seq_len, batch_size, input_dim), s_0=(batch_size, hidden_dim), ) rnn_inputs = result.list_inputs() args = { name: _array(arg_shapes[i]) for i, name in enumerate(rnn_inputs) if name != "i" } args["i"] = mx.nd.zeros([1]) args_grad = { name: _array(arg_shapes[i]) for i, name in enumerate(rnn_inputs) } e_1 = result.bind( ctx=default_context(), args={name: array.copy() for name, array in args.items()}, args_grad={ name: array.copy() for name, array in args_grad.items() if name != "i" }, ) # using unrolled rnn rnn = cell(hidden_dim, prefix='') unroll_outs = [] for inputs in mx.sym.split(data, num_outputs=seq_len, axis=0, squeeze_axis=True): h, states = rnn(inputs, states) unroll_outs.append(mx.sym.expand_dims(h, axis=0)) unroll_outs = _as_list(mx.sym.concat(*unroll_outs, dim=0)) unroll_outs.extend(states) result = mx.sym.Group(unroll_outs) e_2 = result.bind( ctx=default_context(), args={ name: array.copy() for name, array in args.items() if name != "i" }, args_grad={ name: array.copy() for name, array in args_grad.items() if name != "i" }, ) for case_id in range(100): out_grads = [_array(arr.shape) for arr in e_1.outputs] args = {name: array.copy() for name, array in args.items()} e_1.forward(is_train=True, **args) e_1.backward(out_grads) args = { name: array.copy() for name, array in args.items() if name != "i" } e_2.forward(is_train=True, **args) e_2.backward(out_grads) assert len(e_1.outputs) == len(e_2.outputs) for x, y in zip(e_1.outputs, e_2.outputs): x = x.asnumpy() y = y.asnumpy() assert_almost_equal(x, y, rtol=1e-4, atol=1e-4) grad_keys = list(e_2.grad_dict.keys()) e_1_grad = [e_1.grad_dict[x] for x in grad_keys] e_2_grad = [e_2.grad_dict[x] for x in grad_keys] for x, y in zip(e_1_grad, e_2_grad): x = x.asnumpy() y = y.asnumpy() assert_almost_equal(x, y, rtol=1e-4, atol=1e-4)
def test_order(): ctx = default_context() dat_size = 5 def gt_topk(dat, axis, ret_typ, k, is_ascend): if ret_typ == "indices": if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k-1, -1) ret = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap') elif ret_typ == "value": if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k-1, -1) ret = np.take(np.sort(dat, axis=axis), axis=axis, indices=indices, mode='wrap') else: assert dat.shape == (dat_size, dat_size, dat_size, dat_size) assert axis is None or axis ==1 ret = np.zeros(dat.shape) if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k-1, -1) gt_argsort = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap') if axis is None: ret.ravel()[gt_argsort] = 1 else: for i in range(dat_size): for j in range(dat_size): for k in range(dat_size): ret[i, gt_argsort[i, :, j, k], j, k] = 1 return ret # Produce input data for the tests, including ensuring unique values if desired. # Numpy's argsort does not consistently return lowest-index-first for matching # values, making it hard to generate a numpy 'golden copy' to compare against # the mxnet operator. The 'mask' function is particularly hard to test given that # equal values might span the 'k' boundary. Issue exposed with seed 1405838964. def get_values(ensure_unique): while True: data = np.float32(np.random.normal(size=(dat_size, dat_size, dat_size, dat_size))) if not ensure_unique: return data num_unique_values = len(set(data.flatten())) if data.size == num_unique_values: return data a_npy = get_values(ensure_unique=True) a_nd = mx.nd.array(a_npy, ctx=ctx) # test for ret_typ=indices nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="indices", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="indices", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="indices", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="indices", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="indices", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="indices", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=value nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="value", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="value", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="value", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="value", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=mask nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="mask", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="mask", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="mask", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="mask", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="mask", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="mask", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=both nd_ret_topk_val, nd_ret_topk_ind = mx.nd.topk(a_nd, axis=1, ret_typ="both", k=3, is_ascend=True) nd_ret_topk_val = nd_ret_topk_val.asnumpy() nd_ret_topk_ind = nd_ret_topk_ind.asnumpy() gt_val = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) gt_ind = gt_topk(a_npy, axis=1, ret_typ="indices", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk_val, gt_val) assert_almost_equal(nd_ret_topk_ind, gt_ind) # test for sort nd_ret_sort = mx.nd.sort(a_nd, axis=1, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=dat_size, is_ascend=True) assert_almost_equal(nd_ret_sort, gt) nd_ret_sort = mx.nd.sort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=dat_size*dat_size*dat_size*dat_size, is_ascend=False) assert_almost_equal(nd_ret_sort, gt) # test for argsort nd_ret_argsort = mx.nd.argsort(a_nd, axis=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="indices", k=dat_size, is_ascend=True) assert_almost_equal(nd_ret_argsort, gt) nd_ret_argsort = mx.nd.argsort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="indices", k=dat_size*dat_size*dat_size*dat_size, is_ascend=False) assert_almost_equal(nd_ret_argsort, gt) # test topk with a big shape a = mx.nd.arange(0, 54686454, step=1, repeat=1) assert_almost_equal(a.topk(k=54686454).asnumpy(), a.asnumpy()[::-1]) # Repeat those tests that don't involve indices. These should pass even with # duplicated input data values (over many repeated runs with different random seeds, # this will be tested). a_npy = get_values(ensure_unique=False) a_nd = mx.nd.array(a_npy, ctx=ctx) # test for ret_typ=value nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="value", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="value", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="value", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="value", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for sort nd_ret_sort = mx.nd.sort(a_nd, axis=1, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=dat_size, is_ascend=True) assert_almost_equal(nd_ret_sort, gt) nd_ret_sort = mx.nd.sort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=dat_size*dat_size*dat_size*dat_size, is_ascend=False) assert_almost_equal(nd_ret_sort, gt)
def test_unroll(cell_type, num_states, layout): class RNNLayer(gluon.HybridBlock): def __init__(self, cell_type, hidden_size, layout): super(RNNLayer, self).__init__() self.cell = cell_type(hidden_size) self.layout = layout def forward(self, inputs, states, valid_length): if isinstance(valid_length, list) and len(valid_length) == 0: valid_length = None return gluon.rnn.rnn_cell.dynamic_unroll(self.cell, inputs, states, valid_length=valid_length, layout=self.layout) def infer_shape(self, x, *args): self.cell.infer_shape(0, x, False) batch_size = 20 input_size = 50 hidden_size = 30 seq_len = 10 ctx = default_context() if layout == 'TNC': rnn_data = mx.np.random.normal(loc=0, scale=1, size=(seq_len, batch_size, input_size), ctx=ctx) elif layout == 'NTC': rnn_data = mx.np.random.normal(loc=0, scale=1, size=(batch_size, seq_len, input_size), ctx=ctx) else: print("Wrong layout") return valid_length = mx.np.round(mx.np.random.uniform(low=1, high=10, size=(batch_size), ctx=ctx)) state_shape = (batch_size, hidden_size) states = [mx.np.random.normal(loc=0, scale=1, size=state_shape, ctx=ctx) for i in range(num_states)] cell = cell_type(hidden_size) if layout == 'TNC': cell.infer_shape(0, rnn_data[0], False) cell.initialize(ctx=default_context()) cell(rnn_data[0], states) else: cell.infer_shape(0, rnn_data[:,0,:], False) cell.initialize(ctx=default_context()) cell(rnn_data[:,0,:], states) params1 = cell.collect_params() orig_params1 = copy.deepcopy(params1) trainer = gluon.Trainer(params1, 'sgd', {'learning_rate' : 0.03}) with mx.autograd.record(): res1, states1 = cell.unroll(seq_len, rnn_data, states, valid_length=valid_length, layout=layout, merge_outputs=True) res1.backward() trainer.step(batch_size) configs = [ lambda layer: None, lambda layer: layer.hybridize(), lambda layer: layer.hybridize({'inline_limit': 0}), lambda layer: layer.hybridize({'static_alloc': True}), lambda layer: layer.hybridize({'static_alloc': True, 'static_shape': True}) ] # We can't pass None to a hybrid block, but it accepts an empty list. # so we use an empty list to represent valid_length if it's None. if valid_length is None: valid_length = [] for config in configs: layer = RNNLayer(cell_type, hidden_size, layout) layer.infer_shape(rnn_data) layer.initialize(ctx=default_context()) config(layer) res2, states2 = layer(rnn_data, states, valid_length) params2 = layer.collect_params() for key, val in orig_params1.items(): params2['cell.' + key].set_data(copy.deepcopy(val.data())) trainer = gluon.Trainer(params2, 'sgd', {'learning_rate' : 0.03}) with mx.autograd.record(): res2, states2 = layer(rnn_data, states, valid_length) assert_almost_equal(res1, res2, rtol=0.001, atol=0.0001) assert len(states1) == len(states2) for i in range(len(states1)): assert_almost_equal(states1[i], states2[i], rtol=0.001, atol=0.0001) res2.backward() trainer.step(batch_size) for key, val in params1.items(): weight1 = val.data() weight2 = params2['cell.' + key].data() # Subgraph created from npx.foreach in deferred compute is # little bit different from the legacy foreach operator. assert_almost_equal(weight1, weight2, rtol=0.1, atol=0.1)
def test_order(): ctx = default_context() dat_size = 5 def gt_topk(dat, axis, ret_typ, k, is_ascend): if ret_typ == "indices": if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k - 1, -1) ret = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap') elif ret_typ == "value": if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k - 1, -1) ret = np.take(np.sort(dat, axis=axis), axis=axis, indices=indices, mode='wrap') else: assert dat.shape == (dat_size, dat_size, dat_size, dat_size) assert axis is None or axis == 1 ret = np.zeros(dat.shape) if is_ascend: indices = np.arange(k) else: indices = np.arange(-1, -k - 1, -1) gt_argsort = np.take(dat.argsort(axis=axis), axis=axis, indices=indices, mode='wrap') if axis is None: ret.ravel()[gt_argsort] = 1 else: for i in range(dat_size): for j in range(dat_size): for k in range(dat_size): ret[i, gt_argsort[i, :, j, k], j, k] = 1 return ret # Produce input data for the tests, including ensuring unique values if desired. # Numpy's argsort does not consistently return lowest-index-first for matching # values, making it hard to generate a numpy 'golden copy' to compare against # the mxnet operator. The 'mask' function is particularly hard to test given that # equal values might span the 'k' boundary. Issue exposed with seed 1405838964. def get_values(ensure_unique): while True: data = np.float32( np.random.normal(size=(dat_size, dat_size, dat_size, dat_size))) if not ensure_unique: return data num_unique_values = len(set(data.flatten())) if data.size == num_unique_values: return data a_npy = get_values(ensure_unique=True) a_nd = mx.nd.array(a_npy, ctx=ctx) # test for ret_typ=indices nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="indices", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="indices", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="indices", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="indices", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="indices", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="indices", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=value nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="value", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="value", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="value", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="value", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=mask nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="mask", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="mask", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="mask", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="mask", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="mask", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="mask", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for ret_typ=both nd_ret_topk_val, nd_ret_topk_ind = mx.nd.topk(a_nd, axis=1, ret_typ="both", k=3, is_ascend=True) nd_ret_topk_val = nd_ret_topk_val.asnumpy() nd_ret_topk_ind = nd_ret_topk_ind.asnumpy() gt_val = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) gt_ind = gt_topk(a_npy, axis=1, ret_typ="indices", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk_val, gt_val) assert_almost_equal(nd_ret_topk_ind, gt_ind) # test for sort nd_ret_sort = mx.nd.sort(a_nd, axis=1, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=dat_size, is_ascend=True) assert_almost_equal(nd_ret_sort, gt) nd_ret_sort = mx.nd.sort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=dat_size * dat_size * dat_size * dat_size, is_ascend=False) assert_almost_equal(nd_ret_sort, gt) # test for argsort nd_ret_argsort = mx.nd.argsort(a_nd, axis=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="indices", k=dat_size, is_ascend=True) assert_almost_equal(nd_ret_argsort, gt) nd_ret_argsort = mx.nd.argsort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="indices", k=dat_size * dat_size * dat_size * dat_size, is_ascend=False) assert_almost_equal(nd_ret_argsort, gt) # test topk with a big shape a = mx.nd.arange(0, 54686454, step=1, repeat=1) assert_almost_equal(a.topk(k=54686454).asnumpy(), a.asnumpy()[::-1]) # Repeat those tests that don't involve indices. These should pass even with # duplicated input data values (over many repeated runs with different random seeds, # this will be tested). a_npy = get_values(ensure_unique=False) a_nd = mx.nd.array(a_npy, ctx=ctx) # test for ret_typ=value nd_ret_topk = mx.nd.topk(a_nd, axis=1, ret_typ="value", k=3, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=3, is_ascend=True) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=3, ret_typ="value", k=2, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=3, ret_typ="value", k=2, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) nd_ret_topk = mx.nd.topk(a_nd, axis=None, ret_typ="value", k=21, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=21, is_ascend=False) assert_almost_equal(nd_ret_topk, gt) # test for sort nd_ret_sort = mx.nd.sort(a_nd, axis=1, is_ascend=True).asnumpy() gt = gt_topk(a_npy, axis=1, ret_typ="value", k=dat_size, is_ascend=True) assert_almost_equal(nd_ret_sort, gt) nd_ret_sort = mx.nd.sort(a_nd, axis=None, is_ascend=False).asnumpy() gt = gt_topk(a_npy, axis=None, ret_typ="value", k=dat_size * dat_size * dat_size * dat_size, is_ascend=False) assert_almost_equal(nd_ret_sort, gt)
def check_unroll(cell_type, num_states, layout): batch_size = 20 input_size = 50 hidden_size = 30 seq_len = 10 if layout == 'TNC': rnn_data = mx.nd.normal(loc=0, scale=1, shape=(seq_len, batch_size, input_size)) elif layout == 'NTC': rnn_data = mx.nd.normal(loc=0, scale=1, shape=(batch_size, seq_len, input_size)) else: print("Wrong layout") return valid_length = mx.nd.round(mx.nd.random.uniform(low=1, high=10, shape=(batch_size))) state_shape = (batch_size, hidden_size) states = [mx.nd.normal(loc=0, scale=1, shape=state_shape) for i in range(num_states)] cell = cell_type(hidden_size, prefix='rnn_') cell.initialize(ctx=default_context()) if layout == 'TNC': cell(rnn_data[0], states) else: cell(rnn_data[:,0,:], states) params1 = cell.collect_params() orig_params1 = copy.deepcopy(params1) trainer = gluon.Trainer(params1, 'sgd', {'learning_rate' : 0.03}) with mx.autograd.record(): res1, states1 = cell.unroll(seq_len, rnn_data, states, valid_length=valid_length, layout=layout, merge_outputs=True) res1.backward() trainer.step(batch_size) configs = [ lambda layer: None, lambda layer: layer.hybridize(), lambda layer: layer.hybridize({'inline_limit': 0}), lambda layer: layer.hybridize({'static_alloc': True}), lambda layer: layer.hybridize({'static_alloc': True, 'static_shape': True}) ] # We can't pass None to a hybrid block, but it accepts an empty list. # so we use an empty list to represent valid_length if it's None. if valid_length is None: valid_length = [] for config in configs: layer = RNNLayer(cell_type, hidden_size, layout) layer.initialize(ctx=default_context()) config(layer) res2, states2 = layer(rnn_data, states, valid_length) params2 = layer.collect_params() for key, val in orig_params1.items(): params2[key].set_data(copy.deepcopy(val.data())) trainer = gluon.Trainer(params2, 'sgd', {'learning_rate' : 0.03}) with mx.autograd.record(): res2, states2 = layer(rnn_data, states, valid_length) assert_almost_equal(res1, res2, rtol=0.001, atol=0.0001) assert len(states1) == len(states2) for i in range(len(states1)): assert_almost_equal(states1[i], states2[i], rtol=0.001, atol=0.0001) res2.backward() trainer.step(batch_size) for key, val in params1.items(): weight1 = val.data() weight2 = params2[key].data() assert_almost_equal(weight1, weight2, rtol=0.001, atol=0.0001)