def test_layer_norm(): A = np.ones((2, INT_OVERFLOW)) A.attach_grad() with mx.autograd.record(): B = npx.layer_norm(A, gamma=np.ones((2)), beta=np.zeros((2)), axis=0) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 0 B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def get_conv_data_mxnet(oc, ic, n, k, p, s): mpx.random.seed(0) data = mp.random.normal(size=(1, ic, n, n)) weight = mp.random.normal(size=(oc, ic, k, k)) bias = mp.zeros((oc, )) on = conv_out_size(n, k, p, s) out = mp.empty((1, oc, on, on)) # Wait data are generated to make later benchmarking accurate mpx.waitall() return data, weight, bias, out
def test_reshape_like(): A = np.ones((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = npx.reshape_like(A, np.zeros((2, INT_OVERFLOW))) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 1 B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 1
def test_one_hot(): A = np.zeros((INT_OVERFLOW)) A.attach_grad() with mx.autograd.record(): B = npx.one_hot(A, 2) assert B.shape == (INT_OVERFLOW, 2) assert B[0][0] == 1 B.backward() assert A.grad.shape == (INT_OVERFLOW, ) assert A.grad[0] == 0
def test_arange_like(): A = np.zeros((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = npx.arange_like(A) assert B.shape == (INT_OVERFLOW, 2) assert B[100][0] == 200 B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 0
def test_sign(): inp = np.zeros((INT_OVERFLOW, 2)) inp[-1, -1], inp[-2, -1] = 2, -2 inp.attach_grad() with mx.autograd.record(): out = np.sign(inp) out.backward() assert out.shape == inp.shape assert out[0, 0] == 0 and out[-1, -1] == 1 and out[-2, -1] == -1 assert inp.grad.shape == inp.shape assert inp.grad[-1, -1] == 0
def encoder(en_in, num_classes, c_len, ctx): vector = np.zeros((en_in.shape[0], c_len * num_classes), ctx=ctx) div_arr = [] for k in range(c_len-1, -1, -1): div_arr.append(10**k) for i, src in enumerate(en_in): for j, d in enumerate(div_arr): ss = (src/d).astype(np.int32) src -= ss*d vector[i, j*num_classes+ss] = 1 return vector
def test_sigmoid(): A = np.zeros((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = npx.sigmoid(A) assert B.shape == (INT_OVERFLOW, 2) assert B[0][0] == 0.5 B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert_almost_equal(A.grad[0][0], np.array([0.25]), \ rtol=1e-3, atol=1e-5)
def test_argmax(): A = np.zeros((INT_OVERFLOW, 2)) A[10][1] = 1 A.attach_grad() with mx.autograd.record(): B = np.argmax(A) print(B) assert B == 21 B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 0
def n_dim_array(): x = np.arange(12) print(x, type(x)) x = x.reshape(-1, 3) print(" x {} of type '{}' with shape '{}'".format(x, type(x), x.shape)) y = np.zeros((2, 3, 4)) print(" y {} with shape {}".format(y, y.shape)) z = np.random.normal(10, 1, size=(3, 4)) print("z {} with shape {}".format(z, z.shape)) a = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) print("a {} with shape {}".format(a, a.shape))
def test_np_minimum(): # TODO(junwu): Add more test cases x1, x2 = mx.sym.var('x1').as_np_ndarray(), mx.sym.var('x2').as_np_ndarray() ret = mx.sym.np.minimum(x1, x2) assert type(ret) == mx.sym.np._Symbol def check_minimum(x1, x2): mx_out = np.minimum(x1, x2) if isinstance(x1, np.ndarray) or isinstance(x2, np.ndarray): assert type(mx_out) == np.ndarray np_out = _np.minimum( x1.asnumpy() if isinstance(x1, np.ndarray) else x1, x2.asnumpy() if isinstance(x2, np.ndarray) else x2) assert same( mx_out.asnumpy() if isinstance(mx_out, np.ndarray) else mx_out, np_out) check_minimum(np.zeros((2, 1)), np.ones((5, 1, 4))) check_minimum(np.zeros((2, 0)), np.ones((5, 1, 1))) check_minimum(np.zeros(()), np.ones((5, 1, 4)))
def test_nonzero(): A = np.zeros((2, INT_OVERFLOW)) A[0][0] = 1 A.attach_grad() with mx.autograd.record(): B = npx.nonzero(A) assert B.shape == (1, 2) assert B[0][0] == 0 B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def test_invert(): inp = np.zeros((2, INT_OVERFLOW), dtype='uint8') inp[-1, -1] = 1 inp.attach_grad() with mx.autograd.record(): out = np.invert(inp) out.backward() assert out.shape == inp.shape assert out[0, 0] == 255 and out[-1, -1] == 254 assert inp.grad.shape == inp.shape assert inp.grad[-1, -1] == 0
def _params_init(self, ctx): w_ih = self._w_normal_init(self.i_shape, ctx=ctx) w_ir = self._w_normal_init(self.i_shape, ctx=ctx) w_iu = self._w_normal_init(self.i_shape, ctx=ctx) w_hh = self._w_normal_init(self.h_shape, ctx=ctx) w_hr = self._w_normal_init(self.h_shape, ctx=ctx) w_hu = self._w_normal_init(self.h_shape, ctx=ctx) b_h = mxnp.zeros(shape=self.num_hiddens, dtype=mxnp.float32, ctx=ctx) b_r = mxnp.zeros(shape=self.num_hiddens, dtype=mxnp.float32, ctx=ctx) b_u = mxnp.zeros(shape=self.num_hiddens, dtype=mxnp.float32, ctx=ctx) w_ho = self._w_normal_init(self.o_shape, ctx=ctx) b_o = mxnp.zeros(shape=self.num_outputs, dtype=mxnp.float32, ctx=ctx) params = [w_ih, w_ir, w_iu, w_hh, w_hr, w_hu, b_h, b_r, b_u, w_ho, b_o] for param in params: param.attach_grad() return params
def test_rint(): inp = np.zeros((INT_OVERFLOW, 2)) inp[0, 0], inp[-1, -1] = 2.1, 2.9 inp.attach_grad() with mx.autograd.record(): out = np.rint(inp) out.backward() assert out.shape == inp.shape assert out[0, 0] == 2 and out[-1, -1] == 3 assert inp.grad.shape == inp.shape assert inp.grad[-1, -1] == 0
def test_flipud(): inp = np.zeros((2, 1, INT_OVERFLOW)) inp[0, 0, 0] = 2 inp.attach_grad() with mx.autograd.record(): out = np.flipud(inp) out.backward() assert out.shape == inp.shape assert out[1, 0, 0] == 2 assert inp.grad.shape == inp.shape assert inp.grad[0, 0, 0] == 1
def test_ctc_loss(): def test_ctc_loss_size_check(A, label): assertRaises(ValueError, npx.ctc_loss, A, label) L_SEQ, L_ALP, L_LAB, BAT = 2**10, 2**20, 2**6, 2 A = np.zeros((L_SEQ, BAT, L_ALP)) label = np.random.randint(0, L_ALP, (BAT, L_LAB)) # test for expected exception test_ctc_loss_size_check(A, label) # now we shrink the size a little bit and test for an allowed case L_ALP = 2**20 - 1 A = np.zeros((L_SEQ, BAT, L_ALP)) label = np.random.randint(0, L_ALP, (BAT, L_LAB)) A.attach_grad() with mx.autograd.record(): B = npx.ctc_loss(A, label) assert B.shape == (BAT, ) assert type(B[0]).__name__ == 'ndarray' B.backward() assert A.grad.shape == (L_SEQ, BAT, L_ALP) assert type(A[0]).__name__ == 'ndarray'
def test_abs(): # abs absolute and fabs are the same thing inp = np.zeros((INT_OVERFLOW, 2)) inp[-1, -1] = -1 inp.attach_grad() with mx.autograd.record(): out = np.abs(inp) out.backward() assert out.shape == (INT_OVERFLOW, 2) assert out[-1, -1] == 1 assert inp.grad.shape == (INT_OVERFLOW, 2) assert inp.grad[-1, -1] == -1
def test_index_update(): A = np.zeros((2, INT_OVERFLOW)) ind = np.array([[0, 0], [0, 1]], dtype='int32') val = np.array([100, 200]) A.attach_grad() with mx.autograd.record(): B = npx.index_update(A, ind, val) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 100 and B[0][1] == 200 B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def bilinear_kernel(in_channels, out_channels, kernel_size): factor = (kernel_size + 1) // 2 if kernel_size % 2 == 1: center = factor - 1 else: center = factor - 0.5 og = (np.arange(kernel_size).reshape(-1, 1), np.arange(kernel_size).reshape(1, -1)) filt = (1 - np.abs(og[0] - center) / factor) * \ (1 - np.abs(og[1] - center) / factor) weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size)) weight[range(in_channels), range(out_channels), :, :] = filt return np.array(weight)
def test_flip(): inp = np.zeros((2, INT_OVERFLOW)) inp[0, 0] = 2 inp.attach_grad() with mx.autograd.record(): out = np.flip(inp, axis=0) out.backward() assert out.shape == inp.shape assert out[1, 0] == 2 assert inp.grad.shape == inp.shape assert inp.grad[0, 0] == 1 out2 = np.flip(inp, axis=1) assert out2[0, -1] == 2
def test_atleast_xd_family(): def batch_check(x, funcs, shapes): for f, s in zip(funcs, shapes): x.attach_grad() with mx.autograd.record(): y = f(x) assert y.shape == s y.backward() assert x.grad.shape == (INT_OVERFLOW, ) assert x.grad[0] == 0 A = np.zeros((INT_OVERFLOW)) batch_check(A, [np.atleast_1d, np.atleast_2d, np.atleast_3d], \ [(INT_OVERFLOW, ), (1, INT_OVERFLOW), (1, INT_OVERFLOW, 1)])
def _add_workload_inner(): OpArgMngr.add_workload('inner', np.zeros(shape=(1, 80), dtype=np.float64), np.zeros(shape=(1, 80), dtype=np.float64)) for dt in [np.float32, np.float64]: # OpArgMngr.add_workload('inner', np.array(3, dtype=dt)[()], np.array([1, 2], dtype=dt)) # OpArgMngr.add_workload('inner', np.array([1, 2], dtype=dt), np.array(3, dtype=dt)[()]) A = np.array([[1, 2], [3, 4]], dtype=dt) B = np.array([[1, 3], [2, 4]], dtype=dt) C = np.array([1, 1], dtype=dt) OpArgMngr.add_workload('inner', A.T, C) OpArgMngr.add_workload('inner', C, A.T) OpArgMngr.add_workload('inner', B, C) OpArgMngr.add_workload('inner', C, B) OpArgMngr.add_workload('inner', A, B) OpArgMngr.add_workload('inner', A, A) OpArgMngr.add_workload('inner', A, A.copy()) a = np.arange(5).astype(dt) b = a[::-1] OpArgMngr.add_workload('inner', b, a) a = np.arange(24).reshape(2,3,4).astype(dt) b = np.arange(24, 48).reshape(2,3,4).astype(dt) OpArgMngr.add_workload('inner', a, b) OpArgMngr.add_workload('inner', b, a)
def test_nonzero(): A = np.zeros((2, INT_OVERFLOW)) A[0, 1] = 1 A[0, -2] = 1 A.attach_grad() with mx.autograd.record(): B = npx.nonzero(A) assert B.shape == (2, 2) assert B[0, 0] == 0 and B[0, 1] == 1 assert B[1, 0] == 0 and B[1, 1] == int(INT_OVERFLOW - 2) B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def init_state_from_encoder( self, encoder_outputs: np.ndarray, encoder_valid_length: Optional[np.ndarray] = None, target_embed: Optional[np.ndarray] = None) -> List[np.ndarray]: """ Returns the initial states given encoder output. States for teacher-forced training are encoder outputs and a valid length mask for encoder outputs. At inference, this method returns the following state tuple: valid length bias, step state, [projected encoder attention keys, projected encoder attention values] * num_layers, [autoregressive state dummies] * num_layers. :param encoder_outputs: Encoder outputs. Shape: (batch, source_length, encoder_dim). :param encoder_valid_length: Valid lengths of encoder outputs. Shape: (batch,). :param target_embed: Target-side embedding layer output. Shape: (batch, target_length, target_embedding_dim). :return: Initial states. """ if target_embed is None: # Inference: initial step = 0. Shape: (batch_size, 1) steps = np.expand_dims(np.zeros_like(encoder_valid_length), axis=1) else: # Training: steps up to target length. Shape: (1, target_length) steps = np.expand_dims(npx.arange_like(target_embed, axis=1), axis=0) if self.inference_only: # Encoder projection caching, therefore we don't pass the encoder_outputs states = [steps, encoder_valid_length] for layer in self.layers: enc_att_kv = layer.enc_attention.ff_kv(encoder_outputs) states.append(np.transpose(enc_att_kv, axes=(1, 0, 2))) else: # NO encoder projection caching states = [ steps, np.transpose(encoder_outputs, axes=(1, 0, 2)), encoder_valid_length ] _batch_size = encoder_outputs.shape[0] _ctx = encoder_outputs.ctx _dtype = encoder_outputs.dtype dummy_autoregr_states = [ np.zeros(layer.get_states_shape(_batch_size), ctx=_ctx, dtype=_dtype) for layer in self.layers for _ in range(layer.num_state_tensors) ] states += dummy_autoregr_states return states
def load_data_ml100k(data, num_users, num_items, feedback='explicit'): users, items, scores = [], [], [] inter = np.zeros((num_items, num_users)) if feedback == 'explicit' else {} for line in data.itertuples(): user_index, item_index = int(line[1] - 1), int(line[2] - 1) score = int(line[3]) if feedback == 'explicit' else 1 users.append(user_index) items.append(item_index) scores.append(score) if feedback == 'implicit': inter.setdefault(user_index, []).append(item_index) else: inter[item_index, user_index] = score return users, items, scores, inter
def test_boolean_catch_exception(): # adapted from numpy's test_indexing.py arr = np.ones((5, 4, 3)) index = np.array([True], dtype=np.bool_) assert_exception(arr.__getitem__, IndexError, index) index = np.array([False] * 6, dtype=np.bool_) assert_exception(arr.__getitem__, IndexError, index) index = np.zeros((4, 4), dtype=bool) assert_exception(arr.__getitem__, IndexError, index) assert_exception(arr.__getitem__, TypeError, (slice(None), index))
def test_batch_norm(): A = np.ones((2, INT_OVERFLOW)) gamma = np.ones((2)) beta = np.zeros((2)) mov_mean = np.ones((2)) mov_var = np.ones((2)) A.attach_grad() with mx.autograd.record(): B = npx.batch_norm(A, gamma, beta, mov_mean, mov_var) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 0 B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def test_fmin(): inp1 = np.ones((INT_OVERFLOW, 2)) inp1[-1, -1] = -1 inp2 = np.zeros((INT_OVERFLOW, 1)) inp1.attach_grad() inp2.attach_grad() with mx.autograd.record(): out = np.fmin(inp1, inp2) out.backward() assert out.shape == inp1.shape assert out[-1, -1] == -1 assert inp1.grad.shape == inp1.shape assert inp1.grad[-1, -1] == 1 and inp1.grad[0, 0] == 0 assert inp2.grad.shape == inp2.shape assert inp2.grad[-1] == 1 and inp2.grad[0] == 2
def test_subtract(): A = np.zeros((INT_OVERFLOW, 2)) B = np.ones((INT_OVERFLOW, 2)) A[-1, -1] = 3 A.attach_grad() B.attach_grad() with mx.autograd.record(): C = np.subtract(A, B) C.backward() assert C.shape == (INT_OVERFLOW, 2) assert C[0, 0] == -1 and C[-1][-1] == 2 assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 1 assert B.grad.shape == (INT_OVERFLOW, 2) assert B.grad[0][0] == -1