def _test_resize_with_diff_type(dtype): # test normal case data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype) out_nd = transforms.Resize(200)(data_in) data_expected = mx.image.imresize(data_in, 200, 200, 1) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test 4D input data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) out_batch_nd = transforms.Resize(200)(data_bath_in) for i in range(len(out_batch_nd)): assert_almost_equal(mx.image.imresize(data_bath_in[i], 200, 200, 1).asnumpy(), out_batch_nd[i].asnumpy()) # test interp = 2 out_nd = transforms.Resize(200, interpolation=2)(data_in) data_expected = mx.image.imresize(data_in, 200, 200, 2) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test height not equals to width out_nd = transforms.Resize((200, 100))(data_in) data_expected = mx.image.imresize(data_in, 200, 100, 1) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test keep_ratio out_nd = transforms.Resize(150, keep_ratio=True)(data_in) data_expected = mx.image.imresize(data_in, 150, 225, 1) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test size below zero invalid_transform = transforms.Resize(-150, keep_ratio=True) assertRaises(MXNetError, invalid_transform, data_in) # test size more than 2: invalid_transform = transforms.Resize((100, 100, 100), keep_ratio=True) assertRaises(MXNetError, invalid_transform, data_in)
def test_normalize(): # 3D Input data_in_3d = nd.random.uniform(0, 1, (3, 300, 300)) out_nd_3d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_3d) data_expected_3d = data_in_3d.asnumpy() data_expected_3d[:][:][0] = data_expected_3d[:][:][0] / 3.0 data_expected_3d[:][:][1] = (data_expected_3d[:][:][1] - 1.0) / 2.0 data_expected_3d[:][:][2] = data_expected_3d[:][:][2] - 2.0 assert_almost_equal(data_expected_3d, out_nd_3d.asnumpy()) # 4D Input data_in_4d = nd.random.uniform(0, 1, (2, 3, 300, 300)) out_nd_4d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_4d) data_expected_4d = data_in_4d.asnumpy() data_expected_4d[0][:][:][0] = data_expected_4d[0][:][:][0] / 3.0 data_expected_4d[0][:][:][1] = (data_expected_4d[0][:][:][1] - 1.0) / 2.0 data_expected_4d[0][:][:][2] = data_expected_4d[0][:][:][2] - 2.0 data_expected_4d[1][:][:][0] = data_expected_4d[1][:][:][0] / 3.0 data_expected_4d[1][:][:][1] = (data_expected_4d[1][:][:][1] - 1.0) / 2.0 data_expected_4d[1][:][:][2] = data_expected_4d[1][:][:][2] - 2.0 assert_almost_equal(data_expected_4d, out_nd_4d.asnumpy()) # Invalid Input - Neither 3D or 4D input invalid_data_in = nd.random.uniform(0, 1, (5, 5, 3, 300, 300)) normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1)) assertRaises(MXNetError, normalize_transformer, invalid_data_in) # Invalid Input - Channel neither 1 or 3 invalid_data_in = nd.random.uniform(0, 1, (5, 4, 300, 300)) normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1)) assertRaises(MXNetError, normalize_transformer, invalid_data_in)
def test_to_tensor(): # 3D Input data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1))) # 4D Input data_in_4d = nd.random.uniform(0, 1, (2, 3, 300, 300)) out_nd_4d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_4d) data_expected_4d = data_in_4d.asnumpy() data_expected_4d[0][:][:][0] = data_expected_4d[0][:][:][0] / 3.0 data_expected_4d[0][:][:][1] = (data_expected_4d[0][:][:][1] - 1.0) / 2.0 data_expected_4d[0][:][:][2] = data_expected_4d[0][:][:][2] - 2.0 data_expected_4d[1][:][:][0] = data_expected_4d[1][:][:][0] / 3.0 data_expected_4d[1][:][:][1] = (data_expected_4d[1][:][:][1] - 1.0) / 2.0 data_expected_4d[1][:][:][2] = data_expected_4d[1][:][:][2] - 2.0 assert_almost_equal(data_expected_4d, out_nd_4d.asnumpy()) # Default normalize values i.e., mean=0, std=1 data_in_3d_def = nd.random.uniform(0, 1, (3, 300, 300)) out_nd_3d_def = transforms.Normalize()(data_in_3d_def) data_expected_3d_def = data_in_3d_def.asnumpy() assert_almost_equal(data_expected_3d_def, out_nd_3d_def.asnumpy()) # Invalid Input - Neither 3D or 4D input invalid_data_in = nd.random.uniform(0, 1, (5, 5, 3, 300, 300)) normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1)) assertRaises(MXNetError, normalize_transformer, invalid_data_in) # Invalid Input - Channel neither 1 or 3 invalid_data_in = nd.random.uniform(0, 1, (5, 4, 300, 300)) normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1)) assertRaises(MXNetError, normalize_transformer, invalid_data_in)
def check_invalid_rsp_pull_list(kv, key): dns_val = [mx.nd.ones(shape) * 2] * len(key) assertRaises(MXNetError, kv.row_sparse_pull, key, out=dns_val, row_ids=[mx.nd.array([1])] * len(key))
def test_to_tensor(): # 3D Input data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1))) # 4D Input data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2))) # Invalid Input invalid_data_in = nd.random.uniform( 0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8) transformer = transforms.ToTensor() assertRaises(MXNetError, transformer, invalid_data_in) # Bounds (0->0, 255->1) data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert same( out_nd.asnumpy(), np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1))) data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert same( out_nd.asnumpy(), np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1)))
def check_invalid_rsp_pull_single(kv, key): dns_val = mx.nd.ones(shape) * 2 assertRaises(MXNetError, kv.row_sparse_pull, key, out=dns_val, row_ids=mx.nd.array([1]))
def test_NDArrayIter_csr(): # creating toy data num_rows = rnd.randint(5, 15) num_cols = rnd.randint(1, 20) batch_size = rnd.randint(1, num_rows) shape = (num_rows, num_cols) csr, _ = rand_sparse_ndarray(shape, 'csr') dns = csr.asnumpy() # CSRNDArray with last_batch_handle not equal to 'discard' will throw NotImplementedError assertRaises(NotImplementedError, mx.io.NDArrayIter, {'data': csr}, dns, batch_size, last_batch_handle='pad') # CSRNDArray with shuffle csr_iter = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, dns, batch_size, shuffle=True, last_batch_handle='discard')) num_batch = 0 for batch in csr_iter: num_batch += 1 assert(num_batch == num_rows // batch_size) # make iterators csr_iter = iter(mx.io.NDArrayIter(csr, csr, batch_size, last_batch_handle='discard')) begin = 0 for batch in csr_iter: expected = np.zeros((batch_size, num_cols)) end = begin + batch_size expected[:num_rows - begin] = dns[begin:end] if end > num_rows: expected[num_rows - begin:] = dns[0:end - num_rows] assert_almost_equal(batch.data[0].asnumpy(), expected) begin += batch_size
def test_rotate(): transformer = transforms.Rotate(10.) assertRaises(TypeError, transformer, mx.np.ones((3, 30, 60), dtype='uint8')) single_image = mx.np.ones((3, 30, 60), dtype='float32') single_output = transformer(single_image) assert same(single_output.shape, (3, 30, 60)) batch_image = mx.np.ones((3, 3, 30, 60), dtype='float32') batch_output = transformer(batch_image) assert same(batch_output.shape, (3, 3, 30, 60)) input_image = np.array([[[0., 0., 0.], [0., 0., 1.], [0., 0., 0.]]]) rotation_angles_expected_outs = [ (90., np.array([[[0., 1., 0.], [0., 0., 0.], [0., 0., 0.]]])), (180., np.array([[[0., 0., 0.], [1., 0., 0.], [0., 0., 0.]]])), (270., np.array([[[0., 0., 0.], [0., 0., 0.], [0., 1., 0.]]])), (360., np.array([[[0., 0., 0.], [0., 0., 1.], [0., 0., 0.]]])), ] for rot_angle, expected_result in rotation_angles_expected_outs: transformer = transforms.Rotate(rot_angle) ans = transformer(input_image) print(type(ans), ans, type(expected_result), expected_result) assert_almost_equal(ans.asnumpy(), expected_result.asnumpy(), atol=1e-6)
def test_custom_embed(): embed_root = 'embeddings' embed_name = 'my_embed' elem_delim = '\t' pretrain_file = 'my_pretrain_file.txt' _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim, pretrain_file) pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file) my_embed = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim) assert len(my_embed) == 3 assert my_embed.vec_len == 5 assert my_embed.token_to_idx['a'] == 1 assert my_embed.idx_to_token[1] == 'a' first_vec = my_embed.idx_to_vec[0] assert_almost_equal(first_vec.asnumpy(), np.array([0, 0, 0, 0, 0])) unk_vec = my_embed.get_vecs_by_tokens('A') assert_almost_equal(unk_vec.asnumpy(), np.array([0, 0, 0, 0, 0])) a_vec = my_embed.get_vecs_by_tokens('A', lower_case_backup=True) assert_almost_equal(a_vec.asnumpy(), np.array([0.1, 0.2, 0.3, 0.4, 0.5])) unk_vecs = my_embed.get_vecs_by_tokens(['<unk$unk@unk>', '<unk$unk@unk>']) assert_almost_equal(unk_vecs.asnumpy(), np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]])) # Test loaded unknown vectors. pretrain_file2 = 'my_pretrain_file2.txt' _mk_my_pretrain_file3(os.path.join(embed_root, embed_name), elem_delim, pretrain_file2) pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file2) my_embed2 = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim, init_unknown_vec=nd.ones, unknown_token='<unk>') unk_vec2 = my_embed2.get_vecs_by_tokens('<unk>') assert_almost_equal(unk_vec2.asnumpy(), np.array([1, 1, 1, 1, 1])) unk_vec2 = my_embed2.get_vecs_by_tokens('<unk$unk@unk>') assert_almost_equal(unk_vec2.asnumpy(), np.array([1, 1, 1, 1, 1])) my_embed3 = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim, init_unknown_vec=nd.ones, unknown_token='<unk1>') unk_vec3 = my_embed3.get_vecs_by_tokens('<unk1>') assert_almost_equal(unk_vec3.asnumpy(), np.array([1.1, 1.2, 1.3, 1.4, 1.5])) unk_vec3 = my_embed3.get_vecs_by_tokens('<unk$unk@unk>') assert_almost_equal(unk_vec3.asnumpy(), np.array([1.1, 1.2, 1.3, 1.4, 1.5])) # Test error handling. invalid_pretrain_file = 'invalid_pretrain_file.txt' _mk_my_invalid_pretrain_file(os.path.join(embed_root, embed_name), elem_delim, invalid_pretrain_file) pretrain_file_path = os.path.join(embed_root, embed_name, invalid_pretrain_file) assertRaises(AssertionError, text.embedding.CustomEmbedding, pretrain_file_path, elem_delim) invalid_pretrain_file2 = 'invalid_pretrain_file2.txt' _mk_my_invalid_pretrain_file2(os.path.join(embed_root, embed_name), elem_delim, invalid_pretrain_file2) pretrain_file_path = os.path.join(embed_root, embed_name, invalid_pretrain_file2) assertRaises(AssertionError, text.embedding.CustomEmbedding, pretrain_file_path, elem_delim)
def test_resize_gpu(): # Test with normal case 3D input float type data_in_3d = mx.np.random.uniform(0, 255, (300, 300, 3)) out_nd_3d = transforms.Resize((100, 100))(data_in_3d) data_in_4d_nchw = mx.np.moveaxis(mx.np.expand_dims(data_in_3d, axis=0), 3, 1) data_expected_3d = (mx.np.moveaxis( nd.contrib.BilinearResize2D(data_in_4d_nchw.as_nd_ndarray(), height=100, width=100, align_corners=False), 1, 3))[0] assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy()) # Test with normal case 4D input float type data_in_4d = mx.np.random.uniform(0, 255, (2, 300, 300, 3)) out_nd_4d = transforms.Resize((100, 100))(data_in_4d) data_in_4d_nchw = mx.np.moveaxis(data_in_4d, 3, 1) data_expected_4d = mx.np.moveaxis( nd.contrib.BilinearResize2D(data_in_4d_nchw.as_nd_ndarray(), height=100, width=100, align_corners=False), 1, 3) assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy()) # Test invalid interp data_in_3d = mx.np.random.uniform(0, 255, (300, 300, 3)) invalid_transform = transforms.Resize(-150, keep_ratio=False, interpolation=2) assertRaises(MXNetError, invalid_transform, data_in_3d) # Credited to Hang Zhang def py_bilinear_resize_nhwc(x, outputHeight, outputWidth): batch, inputHeight, inputWidth, channel = x.shape if outputHeight == inputHeight and outputWidth == inputWidth: return x y = np.empty([batch, outputHeight, outputWidth, channel]).astype('uint8') rheight = 1.0 * (inputHeight - 1) / (outputHeight - 1) if outputHeight > 1 else 0.0 rwidth = 1.0 * (inputWidth - 1) / (outputWidth - 1) if outputWidth > 1 else 0.0 for h2 in range(outputHeight): h1r = 1.0 * h2 * rheight h1 = int(np.floor(h1r)) h1lambda = h1r - h1 h1p = 1 if h1 < (inputHeight - 1) else 0 for w2 in range(outputWidth): w1r = 1.0 * w2 * rwidth w1 = int(np.floor(w1r)) w1lambda = w1r - w1 w1p = 1 if w1 < (inputHeight - 1) else 0 for b in range(batch): for c in range(channel): y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \ w1lambda*x[b][h1][w1+w1p][c]) + \ h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \ w1lambda*x[b][h1+h1p][w1+w1p][c]) return y
def test_sparse_nd_exception(): """ test invalid sparse operator will throw a exception """ a = mx.nd.ones((2,2)) assertRaises(mx.base.MXNetError, mx.nd.sparse.retain, a, invalid_arg="garbage_value") assertRaises(ValueError, mx.nd.sparse.csr_matrix, a, shape=(3,2)) assertRaises(ValueError, mx.nd.sparse.csr_matrix, (2,2), shape=(3,2)) assertRaises(ValueError, mx.nd.sparse.row_sparse_array, (2,2), shape=(3,2)) assertRaises(ValueError, mx.nd.sparse.zeros, "invalid_stype", (2,2))
def test_module_bind(): sym = mx.sym.Variable('data') sym = mx.sym.Activation(data=sym, act_type='relu', __layout__='TNC') mod = mx.mod.Module(sym, ('data',), None, context=[mx.cpu(0), mx.cpu(1)]) assertRaises(TypeError, mod.bind, data_shapes=[('data', mx.nd.array([10,10]))]) assert mod.binded == False mod.bind(data_shapes=[('data', (10,10))]) assert mod.binded == True
def test_module_bind(): x = mx.sym.Variable("data") net = mx.sym.FullyConnected(x, num_hidden=1) mod = SVRGModule(symbol=net, data_names=['data'], label_names=None, update_freq=2) assertRaises(TypeError, mod.bind, data_shapes=['data', mx.nd.zeros(shape=(2, 1))]) mod.bind(data_shapes=[('data', (2, 1))]) assert mod.binded == True assert mod._mod_aux.binded == True
def test_resize(): # Test with normal case 3D input float type data_in_3d = nd.random.uniform(0, 255, (300, 300, 3)) out_nd_3d = transforms.Resize((100, 100))(data_in_3d) data_in_4d_nchw = nd.moveaxis(nd.expand_dims(data_in_3d, axis=0), 3, 1) data_expected_3d = (nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3))[0] assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy()) # Test with normal case 4D input float type data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)) out_nd_4d = transforms.Resize((100, 100))(data_in_4d) data_in_4d_nchw = nd.moveaxis(data_in_4d, 3, 1) data_expected_4d = nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3) assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy()) # Test invalid interp data_in_3d = nd.random.uniform(0, 255, (300, 300, 3)) invalid_transform = transforms.Resize(-150, keep_ratio=False, interpolation=2) assertRaises(MXNetError, invalid_transform, data_in_3d) # Credited to Hang Zhang def py_bilinear_resize_nhwc(x, outputHeight, outputWidth): batch, inputHeight, inputWidth, channel = x.shape if outputHeight == inputHeight and outputWidth == inputWidth: return x y = np.empty([batch, outputHeight, outputWidth, channel]).astype('uint8') rheight = 1.0 * (inputHeight - 1) / (outputHeight - 1) if outputHeight > 1 else 0.0 rwidth = 1.0 * (inputWidth - 1) / (outputWidth - 1) if outputWidth > 1 else 0.0 for h2 in range(outputHeight): h1r = 1.0 * h2 * rheight h1 = int(np.floor(h1r)) h1lambda = h1r - h1 h1p = 1 if h1 < (inputHeight - 1) else 0 for w2 in range(outputWidth): w1r = 1.0 * w2 * rwidth w1 = int(np.floor(w1r)) w1lambda = w1r - w1 w1p = 1 if w1 < (inputHeight - 1) else 0 for b in range(batch): for c in range(channel): y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \ w1lambda*x[b][h1][w1+w1p][c]) + \ h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \ w1lambda*x[b][h1+h1p][w1+w1p][c]) return y # Test with normal case 3D input int8 type data_in_4d = nd.random.uniform(0, 255, (1, 300, 300, 3)).astype('uint8') out_nd_3d = transforms.Resize((100, 100))(data_in_4d[0]) assert_almost_equal(out_nd_3d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100)[0], atol=1.0) # Test with normal case 4D input int8 type data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)).astype('uint8') out_nd_4d = transforms.Resize((100, 100))(data_in_4d) assert_almost_equal(out_nd_4d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100), atol=1.0)
def test_get_and_pretrain_file_names(): assert len(text.embedding.get_pretrained_file_names( embedding_name='fasttext')) == 327 assert len(text.embedding.get_pretrained_file_names(embedding_name='glove')) == 10 reg = text.embedding.get_pretrained_file_names(embedding_name=None) assert len(reg['glove']) == 10 assert len(reg['fasttext']) == 327 assertRaises(KeyError, text.embedding.get_pretrained_file_names, 'unknown$$')
def test_NDArrayIter_csr(): # creating toy data num_rows = rnd.randint(5, 15) num_cols = rnd.randint(1, 20) batch_size = rnd.randint(1, num_rows) shape = (num_rows, num_cols) csr, _ = rand_sparse_ndarray(shape, 'csr') dns = csr.asnumpy() # CSRNDArray or scipy.sparse.csr_matrix with last_batch_handle not equal to 'discard' will throw NotImplementedError assertRaises(NotImplementedError, mx.io.NDArrayIter, {'data': csr}, dns, batch_size) try: import scipy.sparse as spsp train_data = spsp.csr_matrix(dns) assertRaises(NotImplementedError, mx.io.NDArrayIter, {'data': train_data}, dns, batch_size) except ImportError: pass # scipy.sparse.csr_matrix with shuffle csr_iter = iter(mx.io.NDArrayIter({'data': train_data}, dns, batch_size, shuffle=True, last_batch_handle='discard')) csr_iter_empty_list = iter(mx.io.NDArrayIter({'data': train_data}, [], batch_size, shuffle=True, last_batch_handle='discard')) csr_iter_None = iter(mx.io.NDArrayIter({'data': train_data}, None, batch_size, shuffle=True, last_batch_handle='discard')) _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list, csr_iter_None, num_rows, batch_size) # CSRNDArray with shuffle csr_iter = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, dns, batch_size, shuffle=True, last_batch_handle='discard')) csr_iter_empty_list = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, [], batch_size, shuffle=True, last_batch_handle='discard')) csr_iter_None = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, None, batch_size, shuffle=True, last_batch_handle='discard')) _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list, csr_iter_None, num_rows, batch_size) # make iterators csr_iter = iter(mx.io.NDArrayIter( csr, csr, batch_size, last_batch_handle='discard')) begin = 0 for batch in csr_iter: expected = np.zeros((batch_size, num_cols)) end = begin + batch_size expected[:num_rows - begin] = dns[begin:end] if end > num_rows: expected[num_rows - begin:] = dns[0:end - num_rows] assert_almost_equal(batch.data[0].asnumpy(), expected) begin += batch_size
def _test_crop_resize_with_diff_type(dtype): # test normal case data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 3, 2)(data_in) out_np = out_nd.asnumpy() assert(out_np.sum() == 180) assert((out_np[0:2,1,1].flatten() == [4, 16]).all()) # test 4D input data_bath_in = nd.arange(180).reshape((2, 6, 5, 3)).astype(dtype) out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in) out_batch_np = out_batch_nd.asnumpy() assert(out_batch_np.sum() == 7524) assert((out_batch_np[0:2,0:4,1,1].flatten() == [37, 52, 67, 82, 127, 142, 157, 172]).all()) # test normal case with resize data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_in) data_expected = image.imresize(nd.slice(data_in, (0, 0, 0), (50, 100 , 3)), 25, 25, 2) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test 4D input with resize data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_bath_in) for i in range(len(out_batch_nd)): assert_almost_equal(image.imresize(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3)), 25, 25, 2).asnumpy(), out_batch_nd[i].asnumpy()) # test with resize height and width should be greater than 0 transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 2) assertRaises(MXNetError, transformer, data_in) # test height and width should be greater than 0 transformer = transforms.CropResize(0, 0, -100, -50) assertRaises(MXNetError, transformer, data_in) # test cropped area is bigger than input data transformer = transforms.CropResize(150, 200, 200, 500) assertRaises(MXNetError, transformer, data_in) assertRaises(MXNetError, transformer, data_bath_in)
def _test_crop_resize_with_diff_type(dtype): # test normal case data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 3, 2)(data_in) out_np = out_nd.asnumpy() assert(out_np.sum() == 180) assert((out_np[0:2,1,1].flatten() == [4, 16]).all()) # test 4D input data_bath_in = nd.arange(180).reshape((2, 6, 5, 3)).astype(dtype) out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in) out_batch_np = out_batch_nd.asnumpy() assert(out_batch_np.sum() == 7524) assert((out_batch_np[0:2,0:4,1,1].flatten() == [37, 52, 67, 82, 127, 142, 157, 172]).all()) # test normal case with resize data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_in) data_expected = transforms.Resize(size=25, interpolation=1)(nd.slice(data_in, (0, 0, 0), (50, 100, 3))) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test 4D input with resize data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_bath_in) for i in range(len(out_batch_nd)): actual = transforms.Resize(size=25, interpolation=1)(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3))).asnumpy() expected = out_batch_nd[i].asnumpy() assert_almost_equal(expected, actual) # test with resize height and width should be greater than 0 transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 1) assertRaises(MXNetError, transformer, data_in) # test height and width should be greater than 0 transformer = transforms.CropResize(0, 0, -100, -50) assertRaises(MXNetError, transformer, data_in) # test cropped area is bigger than input data transformer = transforms.CropResize(150, 200, 200, 500) assertRaises(MXNetError, transformer, data_in) assertRaises(MXNetError, transformer, data_bath_in)
def test_module_set_params(): # data iter data = mx.nd.array([[0.05, .10]]); label = mx.nd.array([[.01, 0.99]]); train_data = mx.io.NDArrayIter(data, label, batch_size=1) # symbols x = mx.symbol.Variable('data') x = mx.symbol.FullyConnected(name='fc_0', data=x, num_hidden=2) x = mx.symbol.Activation(name="act_0", data=x, act_type='sigmoid') x = mx.symbol.FullyConnected(name='fc_1', data=x, num_hidden=2) x = mx.symbol.Activation(name="act_1", data=x, act_type='sigmoid') x = mx.symbol.LinearRegressionOutput(data=x, name='softmax', grad_scale=2) # create module mod = mx.mod.Module(x, context=[mx.cpu()]); mod.bind(train_data.provide_data, label_shapes=train_data.provide_label, for_training=True) arg_params_correct = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]), 'fc_0_bias' : mx.nd.array([.35, .35]), 'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]]), 'fc_1_bias' : mx.nd.array([.60, .60])} arg_params_missing = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]), 'fc_0_bias' : mx.nd.array([.35, .35]), 'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]])} arg_params_extra = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]), 'fc_0_bias' : mx.nd.array([.35, .35]), 'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]]), 'fc_1_bias' : mx.nd.array([.60, .60]), 'fc_2_weight': mx.nd.array([.60, .60])} arg_params_missing_extra = {'fc_2_weight': mx.nd.array([.60, .60])} # test regular set_params mod.set_params(force_init=True, arg_params=arg_params_correct, aux_params={}) # test allow missing mod.set_params(force_init=True, arg_params=arg_params_missing, aux_params={}, allow_missing=True) assertRaises(RuntimeError, mod.set_params, force_init=True, arg_params=arg_params_missing, aux_params={}, allow_missing=False) # test allow extra mod.set_params(force_init=True, arg_params=arg_params_extra, aux_params={}, allow_missing=True, allow_extra=True) assertRaises(ValueError, mod.set_params, force_init=True, arg_params=arg_params_extra, aux_params={}, allow_missing=True, allow_extra=False) # test allow missing + extra, assertRaises(RuntimeError, mod.set_params, force_init=True, arg_params=arg_params_missing_extra, aux_params={}, allow_missing=False, allow_extra=False) # test allow missing + extra, this will throw a runtime error assertRaises(ValueError, mod.set_params, force_init=True, arg_params=arg_params_missing_extra, aux_params={}, allow_missing=True, allow_extra=False)
def check_invalid_key_types_single(kv, key): dns_val = mx.nd.ones(shape) * 2 rsp_val = dns_val.tostype('row_sparse') assertRaises(MXNetError, kv.init, key, dns_val) assertRaises(MXNetError, kv.push, key, dns_val) assertRaises(MXNetError, kv.pull, key, dns_val) assertRaises(MXNetError, kv.row_sparse_pull, key, rsp_val, row_ids=mx.nd.array([1]))
def check_invalid_key_types_list(kv, key): dns_val = [mx.nd.ones(shape) * 2] * len(key) rsp_val = [val.tostype('row_sparse') for val in dns_val] assertRaises(MXNetError, kv.init, key, dns_val) assertRaises(MXNetError, kv.push, key, dns_val) assertRaises(MXNetError, kv.pull, key, dns_val) assertRaises(MXNetError, kv.row_sparse_pull, key, rsp_val, row_ids=[mx.nd.array([1])] * len(key))
def test_get_and_pretrain_file_names(): assert len( text.embedding.get_pretrained_file_names( embedding_name='fasttext')) == 327 assert len( text.embedding.get_pretrained_file_names(embedding_name='glove')) == 10 reg = text.embedding.get_pretrained_file_names(embedding_name=None) assert len(reg['glove']) == 10 assert len(reg['fasttext']) == 327 assertRaises(KeyError, text.embedding.get_pretrained_file_names, 'unknown$$')
def test_to_tensor(): # 3D Input data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert_almost_equal(out_nd.asnumpy(), np.transpose( data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1))) # 4D Input data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8')) assert_almost_equal(out_nd.asnumpy(), np.transpose( data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2))) # Invalid Input invalid_data_in = nd.random.uniform(0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8) transformer = transforms.ToTensor() assertRaises(MXNetError, transformer, invalid_data_in)
def test_image_iter_exception(): def check_cifar10_exception(): get_cifar10() dataiter = mx.io.ImageRecordIter( path_imgrec="data/cifar/train.rec", mean_img="data/cifar/cifar10_mean.bin", rand_crop=False, and_mirror=False, shuffle=False, data_shape=(5, 28, 28), batch_size=100, preprocess_threads=4, prefetch_buffer=1) labelcount = [0 for i in range(10)] batchcount = 0 for batch in dataiter: pass assertRaises(MXNetError, check_cifar10_exception)
def test_indices_to_tokens(): counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$']) vocab = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unknown>', reserved_tokens=None) i1 = vocab.to_tokens(1) assert i1 == 'c' i2 = vocab.to_tokens([1]) assert i2 == ['c'] i3 = vocab.to_tokens([0, 0]) assert i3 == ['<unknown>', '<unknown>'] i4 = vocab.to_tokens([3, 0, 3, 2]) assert i4 == ['a', '<unknown>', 'a', 'b'] assertRaises(ValueError, vocab.to_tokens, 100)
def test_indices_to_tokens(): counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$']) indexer = text.indexer.TokenIndexer(counter, most_freq_count=None, min_freq=1, unknown_token='<unknown>', reserved_tokens=None) i1 = indexer.to_tokens(1) assert i1 == 'c' i2 = indexer.to_tokens([1]) assert i2 == ['c'] i3 = indexer.to_tokens([0, 0]) assert i3 == ['<unknown>', '<unknown>'] i4 = indexer.to_tokens([3, 0, 3, 2]) assert i4 == ['a', '<unknown>', 'a', 'b'] assertRaises(ValueError, indexer.to_tokens, 100)
def test_get_embedding_names_and_pretrain_files(): assert len( TokenEmbedding.get_embedding_and_pretrained_file_names( embedding_name='fasttext')) == 294 assert len( TokenEmbedding.get_embedding_and_pretrained_file_names( embedding_name='glove')) == 10 reg = TokenEmbedding.get_embedding_and_pretrained_file_names( embedding_name=None) assert len(reg['glove']) == 10 assert len(reg['fasttext']) == 294 assertRaises(KeyError, TokenEmbedding.get_embedding_and_pretrained_file_names, 'unknown$$')
def test_sparse_parameter(): p = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse') p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)]) row_id = mx.nd.arange(0, 10, ctx=mx.cpu(1)) assert len(p.list_grad()) == 2 # getting row_sparse data without trainer throws an exception assertRaises(RuntimeError, p.list_row_sparse_data, row_id) trainer = mx.gluon.Trainer([p], 'sgd') assert len(p.list_row_sparse_data(row_id)) == 2 weight = p.row_sparse_data(row_id) assert weight.context == mx.cpu(1) assert weight.shape == (10, 10) assert weight.stype == 'row_sparse' assert p.var().name == 'weight' assert p.var().attr('__storage_type__') == str(_STORAGE_TYPE_STR_TO_ID['row_sparse']) assert p.grad(mx.cpu(0)).stype == 'row_sparse' p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)]) assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)]
def test_parameter_invalid_access(): # cannot call data on row_sparse parameters p0 = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse') p0.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)]) assertRaises(RuntimeError, p0.data) assertRaises(RuntimeError, p0.list_data) row_id = mx.nd.arange(0, 10) # cannot call row_sparse_data on dense parameters p1 = gluon.Parameter('weight', shape=(10, 10)) p1.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)]) assertRaises(RuntimeError, p1.row_sparse_data, row_id.copyto(mx.cpu(0))) assertRaises(RuntimeError, p1.list_row_sparse_data, row_id)
def _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list, csr_iter_None, num_rows, batch_size): num_batch = 0 for _, batch_empty_list, batch_empty_None in zip(csr_iter, csr_iter_empty_list, csr_iter_None): assert not batch_empty_list.label, 'label is not empty list' assert not batch_empty_None.label, 'label is not empty list' num_batch += 1 assert(num_batch == num_rows // batch_size) assertRaises(StopIteration, csr_iter.next) assertRaises(StopIteration, csr_iter_empty_list.next) assertRaises(StopIteration, csr_iter_None.next)
def test_buffer_load(): nrepeat = 10 with TemporaryDirectory(prefix='test_buffer_load_') as tmpdir: for repeat in range(nrepeat): # test load_buffer as list data = [] for i in range(10): data.append(random_ndarray(np.random.randint(1, 5))) fname = os.path.join(tmpdir, 'list_{0}.param'.format(repeat)) mx.nd.save(fname, data) with open(fname, 'rb') as dfile: buf_data = dfile.read() data2 = mx.nd.load_frombuffer(buf_data) assert len(data) == len(data2) for x, y in zip(data, data2): assert np.sum(x.asnumpy() != y.asnumpy()) == 0 # test garbage values assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer, buf_data[:-10]) # test load_buffer as dict dmap = {'ndarray xx %s' % i: x for i, x in enumerate(data)} fname = os.path.join(tmpdir, 'dict_{0}.param'.format(repeat)) mx.nd.save(fname, dmap) with open(fname, 'rb') as dfile: buf_dmap = dfile.read() dmap2 = mx.nd.load_frombuffer(buf_dmap) assert len(dmap2) == len(dmap) for k, x in dmap.items(): y = dmap2[k] assert np.sum(x.asnumpy() != y.asnumpy()) == 0 # test garbage values assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer, buf_dmap[:-10]) # we expect the single ndarray to be converted into a list containing the ndarray single_ndarray = data[0] fname = os.path.join(tmpdir, 'single_{0}.param'.format(repeat)) mx.nd.save(fname, single_ndarray) with open(fname, 'rb') as dfile: buf_single_ndarray = dfile.read() single_ndarray_loaded = mx.nd.load_frombuffer( buf_single_ndarray) assert len(single_ndarray_loaded) == 1 single_ndarray_loaded = single_ndarray_loaded[0] assert np.sum(single_ndarray.asnumpy() != single_ndarray_loaded.asnumpy()) == 0 # test garbage values assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer, buf_single_ndarray[:-10])
def test_random_rotation(): # test exceptions for probability input outside of [0,1] assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=1.1) assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=-0.3) # test `forward` transformer = transforms.RandomRotation([-10, 10.]) assertRaises(TypeError, transformer, mx.nd.ones((3, 30, 60), dtype='uint8')) single_image = mx.nd.ones((3, 30, 60), dtype='float32') single_output = transformer(single_image) assert same(single_output.shape, (3, 30, 60)) batch_image = mx.nd.ones((3, 3, 30, 60), dtype='float32') batch_output = transformer(batch_image) assert same(batch_output.shape, (3, 3, 30, 60)) # test identity (rotate_with_proba = 0) transformer = transforms.RandomRotation([-100., 100.], rotate_with_proba=0.0) data = mx.nd.random_normal(shape=(3, 30, 60)) assert_almost_equal(data, transformer(data))
def test_buffer_load(): nrepeat = 10 with TemporaryDirectory(prefix='test_buffer_load_') as tmpdir: for repeat in range(nrepeat): # test load_buffer as list data = [] for i in range(10): data.append(random_ndarray(np.random.randint(1, 5))) fname = os.path.join(tmpdir, 'list_{0}.param'.format(repeat)) mx.nd.save(fname, data) with open(fname, 'rb') as dfile: buf_data = dfile.read() data2 = mx.nd.load_frombuffer(buf_data) assert len(data) == len(data2) for x, y in zip(data, data2): assert np.sum(x.asnumpy() != y.asnumpy()) == 0 # test garbage values assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer, buf_data[:-10]) # test load_buffer as dict dmap = {'ndarray xx %s' % i : x for i, x in enumerate(data)} fname = os.path.join(tmpdir, 'dict_{0}.param'.format(repeat)) mx.nd.save(fname, dmap) with open(fname, 'rb') as dfile: buf_dmap = dfile.read() dmap2 = mx.nd.load_frombuffer(buf_dmap) assert len(dmap2) == len(dmap) for k, x in dmap.items(): y = dmap2[k] assert np.sum(x.asnumpy() != y.asnumpy()) == 0 # test garbage values assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer, buf_dmap[:-10]) # we expect the single ndarray to be converted into a list containing the ndarray single_ndarray = data[0] fname = os.path.join(tmpdir, 'single_{0}.param'.format(repeat)) mx.nd.save(fname, single_ndarray) with open(fname, 'rb') as dfile: buf_single_ndarray = dfile.read() single_ndarray_loaded = mx.nd.load_frombuffer(buf_single_ndarray) assert len(single_ndarray_loaded) == 1 single_ndarray_loaded = single_ndarray_loaded[0] assert np.sum(single_ndarray.asnumpy() != single_ndarray_loaded.asnumpy()) == 0 # test garbage values assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer, buf_single_ndarray[:-10])
def test_composite_embedding_with_one_embedding(): embed_root = 'embeddings' embed_name = 'my_embed' elem_delim = '\t' pretrain_file = 'my_pretrain_file1.txt' _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim, pretrain_file) pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file) my_embed = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim, init_unknown_vec=nd.ones) counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$']) v1 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unk>', reserved_tokens=['<pad>']) ce1 = text.embedding.CompositeEmbedding(v1, my_embed) assert ce1.token_to_idx == {'<unk>': 0, '<pad>': 1, 'c': 2, 'b': 3, 'a': 4, 'some_word$': 5} assert ce1.idx_to_token == ['<unk>', '<pad>', 'c', 'b', 'a', 'some_word$'] assert_almost_equal(ce1.idx_to_vec.asnumpy(), np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [0.6, 0.7, 0.8, 0.9, 1], [0.1, 0.2, 0.3, 0.4, 0.5], [1, 1, 1, 1, 1]]) ) assert ce1.vec_len == 5 assert ce1.reserved_tokens == ['<pad>'] assert_almost_equal(ce1.get_vecs_by_tokens('c').asnumpy(), np.array([1, 1, 1, 1, 1]) ) assert_almost_equal(ce1.get_vecs_by_tokens(['c']).asnumpy(), np.array([[1, 1, 1, 1, 1]]) ) assert_almost_equal(ce1.get_vecs_by_tokens(['a', 'not_exist']).asnumpy(), np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [1, 1, 1, 1, 1]]) ) assert_almost_equal(ce1.get_vecs_by_tokens(['a', 'b']).asnumpy(), np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1]]) ) assert_almost_equal(ce1.get_vecs_by_tokens(['A', 'b']).asnumpy(), np.array([[1, 1, 1, 1, 1], [0.6, 0.7, 0.8, 0.9, 1]]) ) assert_almost_equal(ce1.get_vecs_by_tokens(['A', 'b'], lower_case_backup=True).asnumpy(), np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1]]) ) ce1.update_token_vectors(['a', 'b'], nd.array([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]) ) assert_almost_equal(ce1.idx_to_vec.asnumpy(), np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]) ) assertRaises(ValueError, ce1.update_token_vectors, 'unknown$$$', nd.array([0, 0, 0, 0, 0])) assertRaises(AssertionError, ce1.update_token_vectors, '<unk>', nd.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]])) assertRaises(AssertionError, ce1.update_token_vectors, '<unk>', nd.array([0])) ce1.update_token_vectors(['<unk>'], nd.array([0, 0, 0, 0, 0])) assert_almost_equal(ce1.idx_to_vec.asnumpy(), np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]) ) ce1.update_token_vectors(['<unk>'], nd.array([[10, 10, 10, 10, 10]])) assert_almost_equal(ce1.idx_to_vec.asnumpy(), np.array([[10, 10, 10, 10, 10], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]) ) ce1.update_token_vectors('<unk>', nd.array([0, 0, 0, 0, 0])) assert_almost_equal(ce1.idx_to_vec.asnumpy(), np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]) ) ce1.update_token_vectors('<unk>', nd.array([[10, 10, 10, 10, 10]])) assert_almost_equal(ce1.idx_to_vec.asnumpy(), np.array([[10, 10, 10, 10, 10], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]) )
def test_LibSVMIter(): def check_libSVMIter_synthetic(): cwd = os.getcwd() data_path = os.path.join(cwd, 'data.t') label_path = os.path.join(cwd, 'label.t') with open(data_path, 'w') as fout: fout.write('1.0 0:0.5 2:1.2\n') fout.write('-2.0\n') fout.write('-3.0 0:0.6 1:2.4 2:1.2\n') fout.write('4 2:-1.2\n') with open(label_path, 'w') as fout: fout.write('1.0\n') fout.write('-2.0 0:0.125\n') fout.write('-3.0 2:1.2\n') fout.write('4 1:1.0 2:-1.2\n') data_dir = os.path.join(cwd, 'data') data_train = mx.io.LibSVMIter(data_libsvm=data_path, label_libsvm=label_path, data_shape=(3, ), label_shape=(3, ), batch_size=3) first = mx.nd.array([[0.5, 0., 1.2], [0., 0., 0.], [0.6, 2.4, 1.2]]) second = mx.nd.array([[0., 0., -1.2], [0.5, 0., 1.2], [0., 0., 0.]]) i = 0 for batch in iter(data_train): expected = first.asnumpy() if i == 0 else second.asnumpy() data = data_train.getdata() data.check_format(True) assert_almost_equal(data.asnumpy(), expected) i += 1 def check_libSVMIter_news_data(): news_metadata = { 'name': 'news20.t', 'origin_name': 'news20.t.bz2', 'url': "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/news20.t.bz2", 'feature_dim': 62060 + 1, 'num_classes': 20, 'num_examples': 3993, } batch_size = 33 num_examples = news_metadata['num_examples'] data_dir = os.path.join(os.getcwd(), 'data') get_bz2_data(data_dir, news_metadata['name'], news_metadata['url'], news_metadata['origin_name']) path = os.path.join(data_dir, news_metadata['name']) data_train = mx.io.LibSVMIter( data_libsvm=path, data_shape=(news_metadata['feature_dim'], ), batch_size=batch_size) for epoch in range(2): num_batches = 0 for batch in data_train: # check the range of labels data = batch.data[0] label = batch.label[0] data.check_format(True) assert (np.sum(label.asnumpy() > 20) == 0) assert (np.sum(label.asnumpy() <= 0) == 0) num_batches += 1 expected_num_batches = num_examples / batch_size assert (num_batches == int(expected_num_batches)), num_batches data_train.reset() def check_libSVMIter_exception(): cwd = os.getcwd() data_path = os.path.join(cwd, 'data.t') label_path = os.path.join(cwd, 'label.t') with open(data_path, 'w') as fout: fout.write('1.0 0:0.5 2:1.2\n') fout.write('-2.0\n') # Below line has a neg indice. Should throw an exception fout.write('-3.0 -1:0.6 1:2.4 2:1.2\n') fout.write('4 2:-1.2\n') with open(label_path, 'w') as fout: fout.write('1.0\n') fout.write('-2.0 0:0.125\n') fout.write('-3.0 2:1.2\n') fout.write('4 1:1.0 2:-1.2\n') data_dir = os.path.join(cwd, 'data') data_train = mx.io.LibSVMIter(data_libsvm=data_path, label_libsvm=label_path, data_shape=(3, ), label_shape=(3, ), batch_size=3) for batch in iter(data_train): data_train.get_data().asnumpy() check_libSVMIter_synthetic() check_libSVMIter_news_data() assertRaises(MXNetError, check_libSVMIter_exception)
def test_symbol_bool(): x = mx.symbol.Variable('x') assertRaises(NotImplementedForSymbol, bool, x)
def test_composite_embedding_with_one_embedding(): embed_root = 'embeddings' embed_name = 'my_embed' elem_delim = '\t' pretrain_file = 'my_pretrain_file1.txt' _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim, pretrain_file) pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file) my_embed = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim, init_unknown_vec=nd.ones) counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$']) v1 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unk>', reserved_tokens=['<pad>']) ce1 = text.embedding.CompositeEmbedding(v1, my_embed) assert ce1.token_to_idx == { '<unk>': 0, '<pad>': 1, 'c': 2, 'b': 3, 'a': 4, 'some_word$': 5 } assert ce1.idx_to_token == ['<unk>', '<pad>', 'c', 'b', 'a', 'some_word$'] assert_almost_equal( ce1.idx_to_vec.asnumpy(), np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [0.6, 0.7, 0.8, 0.9, 1], [0.1, 0.2, 0.3, 0.4, 0.5], [1, 1, 1, 1, 1]])) assert ce1.vec_len == 5 assert ce1.reserved_tokens == ['<pad>'] assert_almost_equal( ce1.get_vecs_by_tokens('c').asnumpy(), np.array([1, 1, 1, 1, 1])) assert_almost_equal( ce1.get_vecs_by_tokens(['c']).asnumpy(), np.array([[1, 1, 1, 1, 1]])) assert_almost_equal( ce1.get_vecs_by_tokens(['a', 'not_exist']).asnumpy(), np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [1, 1, 1, 1, 1]])) assert_almost_equal( ce1.get_vecs_by_tokens(['a', 'b']).asnumpy(), np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1]])) assert_almost_equal( ce1.get_vecs_by_tokens(['A', 'b']).asnumpy(), np.array([[1, 1, 1, 1, 1], [0.6, 0.7, 0.8, 0.9, 1]])) assert_almost_equal( ce1.get_vecs_by_tokens(['A', 'b'], lower_case_backup=True).asnumpy(), np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1]])) ce1.update_token_vectors(['a', 'b'], nd.array([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3]])) assert_almost_equal( ce1.idx_to_vec.asnumpy(), np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]])) assertRaises(ValueError, ce1.update_token_vectors, 'unknown$$$', nd.array([0, 0, 0, 0, 0])) assertRaises(AssertionError, ce1.update_token_vectors, '<unk>', nd.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]])) assertRaises(AssertionError, ce1.update_token_vectors, '<unk>', nd.array([0])) ce1.update_token_vectors(['<unk>'], nd.array([0, 0, 0, 0, 0])) assert_almost_equal( ce1.idx_to_vec.asnumpy(), np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]])) ce1.update_token_vectors(['<unk>'], nd.array([[10, 10, 10, 10, 10]])) assert_almost_equal( ce1.idx_to_vec.asnumpy(), np.array([[10, 10, 10, 10, 10], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]])) ce1.update_token_vectors('<unk>', nd.array([0, 0, 0, 0, 0])) assert_almost_equal( ce1.idx_to_vec.asnumpy(), np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]])) ce1.update_token_vectors('<unk>', nd.array([[10, 10, 10, 10, 10]])) assert_almost_equal( ce1.idx_to_vec.asnumpy(), np.array([[10, 10, 10, 10, 10], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]))
def test_LibSVMIter(): def check_libSVMIter_synthetic(): cwd = os.getcwd() data_path = os.path.join(cwd, 'data.t') label_path = os.path.join(cwd, 'label.t') with open(data_path, 'w') as fout: fout.write('1.0 0:0.5 2:1.2\n') fout.write('-2.0\n') fout.write('-3.0 0:0.6 1:2.4 2:1.2\n') fout.write('4 2:-1.2\n') with open(label_path, 'w') as fout: fout.write('1.0\n') fout.write('-2.0 0:0.125\n') fout.write('-3.0 2:1.2\n') fout.write('4 1:1.0 2:-1.2\n') data_dir = os.path.join(cwd, 'data') data_train = mx.io.LibSVMIter(data_libsvm=data_path, label_libsvm=label_path, data_shape=(3, ), label_shape=(3, ), batch_size=3) first = mx.nd.array([[0.5, 0., 1.2], [0., 0., 0.], [0.6, 2.4, 1.2]]) second = mx.nd.array([[0., 0., -1.2], [0.5, 0., 1.2], [0., 0., 0.]]) i = 0 for batch in iter(data_train): expected = first.asnumpy() if i == 0 else second.asnumpy() data = data_train.getdata() data.check_format(True) assert_almost_equal(data.asnumpy(), expected) i += 1 def check_libSVMIter_news_data(): news_metadata = { 'name': 'news20.t', 'origin_name': 'news20.t.bz2', 'url': "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/news20.t.bz2", 'feature_dim': 62060 + 1, 'num_classes': 20, 'num_examples': 3993, } batch_size = 33 num_examples = news_metadata['num_examples'] data_dir = os.path.join(os.getcwd(), 'data') get_bz2_data(data_dir, news_metadata['name'], news_metadata['url'], news_metadata['origin_name']) path = os.path.join(data_dir, news_metadata['name']) data_train = mx.io.LibSVMIter(data_libsvm=path, data_shape=(news_metadata['feature_dim'],), batch_size=batch_size) for epoch in range(2): num_batches = 0 for batch in data_train: # check the range of labels data = batch.data[0] label = batch.label[0] data.check_format(True) assert(np.sum(label.asnumpy() > 20) == 0) assert(np.sum(label.asnumpy() <= 0) == 0) num_batches += 1 expected_num_batches = num_examples / batch_size assert(num_batches == int(expected_num_batches)), num_batches data_train.reset() def check_libSVMIter_exception(): cwd = os.getcwd() data_path = os.path.join(cwd, 'data.t') label_path = os.path.join(cwd, 'label.t') with open(data_path, 'w') as fout: fout.write('1.0 0:0.5 2:1.2\n') fout.write('-2.0\n') # Below line has a neg indice. Should throw an exception fout.write('-3.0 -1:0.6 1:2.4 2:1.2\n') fout.write('4 2:-1.2\n') with open(label_path, 'w') as fout: fout.write('1.0\n') fout.write('-2.0 0:0.125\n') fout.write('-3.0 2:1.2\n') fout.write('4 1:1.0 2:-1.2\n') data_dir = os.path.join(cwd, 'data') data_train = mx.io.LibSVMIter(data_libsvm=data_path, label_libsvm=label_path, data_shape=(3, ), label_shape=(3, ), batch_size=3) for batch in iter(data_train): data_train.get_data().asnumpy() check_libSVMIter_synthetic() check_libSVMIter_news_data() assertRaises(MXNetError, check_libSVMIter_exception)
def test_sparse_nd_check_format(): """ test check_format for sparse ndarray """ shape = rand_shape_2d() stypes = ["csr", "row_sparse"] for stype in stypes: arr, _ = rand_sparse_ndarray(shape, stype) arr.check_format() arr = mx.nd.sparse.zeros(stype, shape) arr.check_format() # CSR format index pointer array should be less than the number of rows shape = (3, 4) data_list = [7, 8, 9] indices_list = [0, 2, 1] indptr_list = [0, 5, 2, 3] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format indices should be in ascending order per row indices_list = [2, 1, 1] indptr_list = [0, 2, 2, 3] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format indptr should end with value equal with size of indices indices_list = [1, 2, 1] indptr_list = [0, 2, 2, 4] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format indices should not be negative indices_list = [0, 2, 1] indptr_list = [0, -2, 2, 3] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # Row Sparse format indices should be less than the number of rows shape = (3, 2) data_list = [[1, 2], [3, 4]] indices_list = [1, 4] a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # Row Sparse format indices should be in ascending order indices_list = [1, 0] a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # Row Sparse format indices should not be negative indices_list = [1, -2] a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format)
def test_sparse_nd_check_format(): """ test check_format for sparse ndarray """ shape = rand_shape_2d() stypes = ["csr", "row_sparse"] for stype in stypes: arr, _ = rand_sparse_ndarray(shape, stype) arr.check_format() arr = mx.nd.sparse.zeros(stype, shape) arr.check_format() # CSR format index pointer array should be less than the number of rows shape = (3, 4) data_list = [7, 8, 9] indices_list = [0, 2, 1] indptr_list = [0, 5, 2, 3] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format indices should be in ascending order per row indices_list = [2, 1, 1] indptr_list = [0, 2, 2, 3] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format indptr should end with value equal with size of indices indices_list = [1, 2, 1] indptr_list = [0, 2, 2, 4] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format indices should not be negative indices_list = [0, 2, 1] indptr_list = [0, -2, 2, 3] a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # CSR format should be 2 Dimensional. a = mx.nd.array([1, 2, 3]) assertRaises(ValueError, a.tostype, 'csr') a = mx.nd.array([[[1, 2, 3]]]) assertRaises(ValueError, a.tostype, 'csr') # Row Sparse format indices should be less than the number of rows shape = (3, 2) data_list = [[1, 2], [3, 4]] indices_list = [1, 4] a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # Row Sparse format indices should be in ascending order indices_list = [1, 0] a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format) # Row Sparse format indices should not be negative indices_list = [1, -2] a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape) assertRaises(mx.base.MXNetError, a.check_format)
def test_vocabulary(): counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$']) v1 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unk>', reserved_tokens=None) assert len(v1) == 5 assert v1.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3, 'some_word$': 4} assert v1.idx_to_token[1] == 'c' assert v1.unknown_token == '<unk>' assert v1.reserved_tokens is None v2 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='<unk>', reserved_tokens=None) assert len(v2) == 3 assert v2.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2} assert v2.idx_to_token[1] == 'c' assert v2.unknown_token == '<unk>' assert v2.reserved_tokens is None v3 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=100, unknown_token='<unk>', reserved_tokens=None) assert len(v3) == 1 assert v3.token_to_idx == {'<unk>': 0} assert v3.idx_to_token[0] == '<unk>' assert v3.unknown_token == '<unk>' assert v3.reserved_tokens is None v4 = text.vocab.Vocabulary(counter, most_freq_count=2, min_freq=1, unknown_token='<unk>', reserved_tokens=None) assert len(v4) == 3 assert v4.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2} assert v4.idx_to_token[1] == 'c' assert v4.unknown_token == '<unk>' assert v4.reserved_tokens is None v5 = text.vocab.Vocabulary(counter, most_freq_count=3, min_freq=1, unknown_token='<unk>', reserved_tokens=None) assert len(v5) == 4 assert v5.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3} assert v5.idx_to_token[1] == 'c' assert v5.unknown_token == '<unk>' assert v5.reserved_tokens is None v6 = text.vocab.Vocabulary(counter, most_freq_count=100, min_freq=1, unknown_token='<unk>', reserved_tokens=None) assert len(v6) == 5 assert v6.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3, 'some_word$': 4} assert v6.idx_to_token[1] == 'c' assert v6.unknown_token == '<unk>' assert v6.reserved_tokens is None v7 = text.vocab.Vocabulary(counter, most_freq_count=1, min_freq=2, unknown_token='<unk>', reserved_tokens=None) assert len(v7) == 2 assert v7.token_to_idx == {'<unk>': 0, 'c': 1} assert v7.idx_to_token[1] == 'c' assert v7.unknown_token == '<unk>' assert v7.reserved_tokens is None assertRaises(AssertionError, text.vocab.Vocabulary, counter, most_freq_count=None, min_freq=0, unknown_token='<unknown>', reserved_tokens=['b']) assertRaises(AssertionError, text.vocab.Vocabulary, counter, most_freq_count=None, min_freq=1, unknown_token='<unknown>', reserved_tokens=['b', 'b']) assertRaises(AssertionError, text.vocab.Vocabulary, counter, most_freq_count=None, min_freq=1, unknown_token='<unknown>', reserved_tokens=['b', '<unknown>']) v8 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unknown>', reserved_tokens=['b']) assert len(v8) == 5 assert v8.token_to_idx == {'<unknown>': 0, 'b': 1, 'c': 2, 'a': 3, 'some_word$': 4} assert v8.idx_to_token[1] == 'b' assert v8.unknown_token == '<unknown>' assert v8.reserved_tokens == ['b'] v9 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='<unk>', reserved_tokens=['b', 'a']) assert len(v9) == 4 assert v9.token_to_idx == {'<unk>': 0, 'b': 1, 'a': 2, 'c': 3} assert v9.idx_to_token[1] == 'b' assert v9.unknown_token == '<unk>' assert v9.reserved_tokens == ['b', 'a'] v10 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=100, unknown_token='<unk>', reserved_tokens=['b', 'c']) assert len(v10) == 3 assert v10.token_to_idx == {'<unk>': 0, 'b': 1, 'c': 2} assert v10.idx_to_token[1] == 'b' assert v10.unknown_token == '<unk>' assert v10.reserved_tokens == ['b', 'c'] v11 = text.vocab.Vocabulary(counter, most_freq_count=1, min_freq=2, unknown_token='<unk>', reserved_tokens=['<pad>', 'b']) assert len(v11) == 4 assert v11.token_to_idx == {'<unk>': 0, '<pad>': 1, 'b': 2, 'c': 3} assert v11.idx_to_token[1] == '<pad>' assert v11.unknown_token == '<unk>' assert v11.reserved_tokens == ['<pad>', 'b'] v12 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='b', reserved_tokens=['<pad>']) assert len(v12) == 3 assert v12.token_to_idx == {'b': 0, '<pad>': 1, 'c': 2} assert v12.idx_to_token[1] == '<pad>' assert v12.unknown_token == 'b' assert v12.reserved_tokens == ['<pad>'] v13 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='a', reserved_tokens=['<pad>']) assert len(v13) == 4 assert v13.token_to_idx == {'a': 0, '<pad>': 1, 'c': 2, 'b': 3} assert v13.idx_to_token[1] == '<pad>' assert v13.unknown_token == 'a' assert v13.reserved_tokens == ['<pad>'] counter_tuple = Counter([('a', 'a'), ('b', 'b'), ('b', 'b'), ('c', 'c'), ('c', 'c'), ('c', 'c'), ('some_word$', 'some_word$')]) v14 = text.vocab.Vocabulary(counter_tuple, most_freq_count=None, min_freq=1, unknown_token=('<unk>', '<unk>'), reserved_tokens=None) assert len(v14) == 5 assert v14.token_to_idx == {('<unk>', '<unk>'): 0, ('c', 'c'): 1, ('b', 'b'): 2, ('a', 'a'): 3, ('some_word$', 'some_word$'): 4} assert v14.idx_to_token[1] == ('c', 'c') assert v14.unknown_token == ('<unk>', '<unk>') assert v14.reserved_tokens is None