def _test_resize_with_diff_type(dtype):
     # test normal case
     data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype)
     out_nd = transforms.Resize(200)(data_in)
     data_expected = mx.image.imresize(data_in, 200, 200, 1)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test 4D input
     data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype)
     out_batch_nd = transforms.Resize(200)(data_bath_in)
     for i in range(len(out_batch_nd)):
         assert_almost_equal(mx.image.imresize(data_bath_in[i], 200, 200, 1).asnumpy(),
             out_batch_nd[i].asnumpy())
     # test interp = 2
     out_nd = transforms.Resize(200, interpolation=2)(data_in)
     data_expected = mx.image.imresize(data_in, 200, 200, 2)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test height not equals to width
     out_nd = transforms.Resize((200, 100))(data_in)
     data_expected = mx.image.imresize(data_in, 200, 100, 1)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test keep_ratio
     out_nd = transforms.Resize(150, keep_ratio=True)(data_in)
     data_expected = mx.image.imresize(data_in, 150, 225, 1)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test size below zero
     invalid_transform = transforms.Resize(-150, keep_ratio=True)
     assertRaises(MXNetError, invalid_transform, data_in)
     # test size more than 2:
     invalid_transform = transforms.Resize((100, 100, 100), keep_ratio=True)
     assertRaises(MXNetError, invalid_transform, data_in)
Beispiel #2
0
 def _test_resize_with_diff_type(dtype):
     # test normal case
     data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype)
     out_nd = transforms.Resize(200)(data_in)
     data_expected = mx.image.imresize(data_in, 200, 200, 1)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test 4D input
     data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype)
     out_batch_nd = transforms.Resize(200)(data_bath_in)
     for i in range(len(out_batch_nd)):
         assert_almost_equal(mx.image.imresize(data_bath_in[i], 200, 200, 1).asnumpy(),
             out_batch_nd[i].asnumpy())
     # test interp = 2
     out_nd = transforms.Resize(200, interpolation=2)(data_in)
     data_expected = mx.image.imresize(data_in, 200, 200, 2)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test height not equals to width
     out_nd = transforms.Resize((200, 100))(data_in)
     data_expected = mx.image.imresize(data_in, 200, 100, 1)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test keep_ratio
     out_nd = transforms.Resize(150, keep_ratio=True)(data_in)
     data_expected = mx.image.imresize(data_in, 150, 225, 1)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test size below zero
     invalid_transform = transforms.Resize(-150, keep_ratio=True)
     assertRaises(MXNetError, invalid_transform, data_in)
     # test size more than 2:
     invalid_transform = transforms.Resize((100, 100, 100), keep_ratio=True)
     assertRaises(MXNetError, invalid_transform, data_in)
Beispiel #3
0
def test_normalize():
    # 3D Input
    data_in_3d = nd.random.uniform(0, 1, (3, 300, 300))
    out_nd_3d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_3d)
    data_expected_3d = data_in_3d.asnumpy()
    data_expected_3d[:][:][0] = data_expected_3d[:][:][0] / 3.0
    data_expected_3d[:][:][1] = (data_expected_3d[:][:][1] - 1.0) / 2.0
    data_expected_3d[:][:][2] = data_expected_3d[:][:][2] - 2.0
    assert_almost_equal(data_expected_3d, out_nd_3d.asnumpy())

    # 4D Input
    data_in_4d = nd.random.uniform(0, 1, (2, 3, 300, 300))
    out_nd_4d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_4d)
    data_expected_4d = data_in_4d.asnumpy()
    data_expected_4d[0][:][:][0] = data_expected_4d[0][:][:][0] / 3.0
    data_expected_4d[0][:][:][1] = (data_expected_4d[0][:][:][1] - 1.0) / 2.0
    data_expected_4d[0][:][:][2] = data_expected_4d[0][:][:][2] - 2.0
    data_expected_4d[1][:][:][0] = data_expected_4d[1][:][:][0] / 3.0
    data_expected_4d[1][:][:][1] = (data_expected_4d[1][:][:][1] - 1.0) / 2.0
    data_expected_4d[1][:][:][2] = data_expected_4d[1][:][:][2] - 2.0
    assert_almost_equal(data_expected_4d, out_nd_4d.asnumpy())

    # Invalid Input - Neither 3D or 4D input
    invalid_data_in = nd.random.uniform(0, 1, (5, 5, 3, 300, 300))
    normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
    assertRaises(MXNetError, normalize_transformer, invalid_data_in)

    # Invalid Input - Channel neither 1 or 3
    invalid_data_in = nd.random.uniform(0, 1, (5, 4, 300, 300))
    normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
    assertRaises(MXNetError, normalize_transformer, invalid_data_in)
def test_to_tensor():
    # 3D Input
    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))

    # 4D Input
    data_in_4d = nd.random.uniform(0, 1, (2, 3, 300, 300))
    out_nd_4d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_4d)
    data_expected_4d = data_in_4d.asnumpy()
    data_expected_4d[0][:][:][0] = data_expected_4d[0][:][:][0] / 3.0
    data_expected_4d[0][:][:][1] = (data_expected_4d[0][:][:][1] - 1.0) / 2.0
    data_expected_4d[0][:][:][2] = data_expected_4d[0][:][:][2] - 2.0
    data_expected_4d[1][:][:][0] = data_expected_4d[1][:][:][0] / 3.0
    data_expected_4d[1][:][:][1] = (data_expected_4d[1][:][:][1] - 1.0) / 2.0
    data_expected_4d[1][:][:][2] = data_expected_4d[1][:][:][2] - 2.0
    assert_almost_equal(data_expected_4d, out_nd_4d.asnumpy())

    # Default normalize values i.e., mean=0, std=1
    data_in_3d_def = nd.random.uniform(0, 1, (3, 300, 300))
    out_nd_3d_def = transforms.Normalize()(data_in_3d_def)
    data_expected_3d_def = data_in_3d_def.asnumpy()
    assert_almost_equal(data_expected_3d_def, out_nd_3d_def.asnumpy())

    # Invalid Input - Neither 3D or 4D input
    invalid_data_in = nd.random.uniform(0, 1, (5, 5, 3, 300, 300))
    normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
    assertRaises(MXNetError, normalize_transformer, invalid_data_in)

    # Invalid Input - Channel neither 1 or 3
    invalid_data_in = nd.random.uniform(0, 1, (5, 4, 300, 300))
    normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
    assertRaises(MXNetError, normalize_transformer, invalid_data_in)
 def check_invalid_rsp_pull_list(kv, key):
     dns_val = [mx.nd.ones(shape) * 2] * len(key)
     assertRaises(MXNetError,
                  kv.row_sparse_pull,
                  key,
                  out=dns_val,
                  row_ids=[mx.nd.array([1])] * len(key))
def test_to_tensor():
    # 3D Input
    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))

    # 4D Input
    data_in = np.random.uniform(0, 255,
                                (5, 300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2)))

    # Invalid Input
    invalid_data_in = nd.random.uniform(
        0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8)
    transformer = transforms.ToTensor()
    assertRaises(MXNetError, transformer, invalid_data_in)

    # Bounds (0->0, 255->1)
    data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert same(
        out_nd.asnumpy(),
        np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1)))

    data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert same(
        out_nd.asnumpy(),
        np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1)))
 def check_invalid_rsp_pull_single(kv, key):
     dns_val = mx.nd.ones(shape) * 2
     assertRaises(MXNetError,
                  kv.row_sparse_pull,
                  key,
                  out=dns_val,
                  row_ids=mx.nd.array([1]))
def test_NDArrayIter_csr():
    # creating toy data
    num_rows = rnd.randint(5, 15)
    num_cols = rnd.randint(1, 20)
    batch_size = rnd.randint(1, num_rows)
    shape = (num_rows, num_cols)
    csr, _ = rand_sparse_ndarray(shape, 'csr')
    dns = csr.asnumpy()

    # CSRNDArray with last_batch_handle not equal to 'discard' will throw NotImplementedError
    assertRaises(NotImplementedError, mx.io.NDArrayIter, {'data': csr}, dns, batch_size,
                 last_batch_handle='pad')

    # CSRNDArray with shuffle
    csr_iter = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, dns, batch_size,
                    shuffle=True, last_batch_handle='discard'))
    num_batch = 0
    for batch in csr_iter:
        num_batch += 1

    assert(num_batch == num_rows // batch_size)

    # make iterators
    csr_iter = iter(mx.io.NDArrayIter(csr, csr, batch_size, last_batch_handle='discard'))
    begin = 0
    for batch in csr_iter:
        expected = np.zeros((batch_size, num_cols))
        end = begin + batch_size
        expected[:num_rows - begin] = dns[begin:end]
        if end > num_rows:
            expected[num_rows - begin:] = dns[0:end - num_rows]
        assert_almost_equal(batch.data[0].asnumpy(), expected)
        begin += batch_size
def test_normalize():
    # 3D Input
    data_in_3d = nd.random.uniform(0, 1, (3, 300, 300))
    out_nd_3d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_3d)
    data_expected_3d = data_in_3d.asnumpy()
    data_expected_3d[:][:][0] = data_expected_3d[:][:][0] / 3.0
    data_expected_3d[:][:][1] = (data_expected_3d[:][:][1] - 1.0) / 2.0
    data_expected_3d[:][:][2] = data_expected_3d[:][:][2] - 2.0
    assert_almost_equal(data_expected_3d, out_nd_3d.asnumpy())

    # 4D Input
    data_in_4d = nd.random.uniform(0, 1, (2, 3, 300, 300))
    out_nd_4d = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in_4d)
    data_expected_4d = data_in_4d.asnumpy()
    data_expected_4d[0][:][:][0] = data_expected_4d[0][:][:][0] / 3.0
    data_expected_4d[0][:][:][1] = (data_expected_4d[0][:][:][1] - 1.0) / 2.0
    data_expected_4d[0][:][:][2] = data_expected_4d[0][:][:][2] - 2.0
    data_expected_4d[1][:][:][0] = data_expected_4d[1][:][:][0] / 3.0
    data_expected_4d[1][:][:][1] = (data_expected_4d[1][:][:][1] - 1.0) / 2.0
    data_expected_4d[1][:][:][2] = data_expected_4d[1][:][:][2] - 2.0
    assert_almost_equal(data_expected_4d, out_nd_4d.asnumpy())

    # Invalid Input - Neither 3D or 4D input
    invalid_data_in = nd.random.uniform(0, 1, (5, 5, 3, 300, 300))
    normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
    assertRaises(MXNetError, normalize_transformer, invalid_data_in)

    # Invalid Input - Channel neither 1 or 3
    invalid_data_in = nd.random.uniform(0, 1, (5, 4, 300, 300))
    normalize_transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
    assertRaises(MXNetError, normalize_transformer, invalid_data_in)
def test_rotate():
    transformer = transforms.Rotate(10.)
    assertRaises(TypeError, transformer, mx.np.ones((3, 30, 60),
                                                    dtype='uint8'))
    single_image = mx.np.ones((3, 30, 60), dtype='float32')
    single_output = transformer(single_image)
    assert same(single_output.shape, (3, 30, 60))
    batch_image = mx.np.ones((3, 3, 30, 60), dtype='float32')
    batch_output = transformer(batch_image)
    assert same(batch_output.shape, (3, 3, 30, 60))

    input_image = np.array([[[0., 0., 0.], [0., 0., 1.], [0., 0., 0.]]])
    rotation_angles_expected_outs = [
        (90., np.array([[[0., 1., 0.], [0., 0., 0.], [0., 0., 0.]]])),
        (180., np.array([[[0., 0., 0.], [1., 0., 0.], [0., 0., 0.]]])),
        (270., np.array([[[0., 0., 0.], [0., 0., 0.], [0., 1., 0.]]])),
        (360., np.array([[[0., 0., 0.], [0., 0., 1.], [0., 0., 0.]]])),
    ]
    for rot_angle, expected_result in rotation_angles_expected_outs:
        transformer = transforms.Rotate(rot_angle)
        ans = transformer(input_image)
        print(type(ans), ans, type(expected_result), expected_result)
        assert_almost_equal(ans.asnumpy(),
                            expected_result.asnumpy(),
                            atol=1e-6)
def test_custom_embed():
    embed_root = 'embeddings'
    embed_name = 'my_embed'
    elem_delim = '\t'
    pretrain_file = 'my_pretrain_file.txt'

    _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim, pretrain_file)

    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file)

    my_embed = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim)

    assert len(my_embed) == 3
    assert my_embed.vec_len == 5
    assert my_embed.token_to_idx['a'] == 1
    assert my_embed.idx_to_token[1] == 'a'

    first_vec = my_embed.idx_to_vec[0]
    assert_almost_equal(first_vec.asnumpy(), np.array([0, 0, 0, 0, 0]))

    unk_vec = my_embed.get_vecs_by_tokens('A')
    assert_almost_equal(unk_vec.asnumpy(), np.array([0, 0, 0, 0, 0]))

    a_vec = my_embed.get_vecs_by_tokens('A', lower_case_backup=True)
    assert_almost_equal(a_vec.asnumpy(), np.array([0.1, 0.2, 0.3, 0.4, 0.5]))

    unk_vecs = my_embed.get_vecs_by_tokens(['<unk$unk@unk>', '<unk$unk@unk>'])
    assert_almost_equal(unk_vecs.asnumpy(), np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]))

    # Test loaded unknown vectors.
    pretrain_file2 = 'my_pretrain_file2.txt'
    _mk_my_pretrain_file3(os.path.join(embed_root, embed_name), elem_delim, pretrain_file2)
    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file2)
    my_embed2 = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim,
                                               init_unknown_vec=nd.ones, unknown_token='<unk>')
    unk_vec2 = my_embed2.get_vecs_by_tokens('<unk>')
    assert_almost_equal(unk_vec2.asnumpy(), np.array([1, 1, 1, 1, 1]))
    unk_vec2 = my_embed2.get_vecs_by_tokens('<unk$unk@unk>')
    assert_almost_equal(unk_vec2.asnumpy(), np.array([1, 1, 1, 1, 1]))

    my_embed3 = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim,
                                               init_unknown_vec=nd.ones, unknown_token='<unk1>')
    unk_vec3 = my_embed3.get_vecs_by_tokens('<unk1>')
    assert_almost_equal(unk_vec3.asnumpy(), np.array([1.1, 1.2, 1.3, 1.4, 1.5]))
    unk_vec3 = my_embed3.get_vecs_by_tokens('<unk$unk@unk>')
    assert_almost_equal(unk_vec3.asnumpy(), np.array([1.1, 1.2, 1.3, 1.4, 1.5]))

    # Test error handling.
    invalid_pretrain_file = 'invalid_pretrain_file.txt'
    _mk_my_invalid_pretrain_file(os.path.join(embed_root, embed_name), elem_delim,
                                 invalid_pretrain_file)
    pretrain_file_path = os.path.join(embed_root, embed_name, invalid_pretrain_file)
    assertRaises(AssertionError, text.embedding.CustomEmbedding, pretrain_file_path, elem_delim)

    invalid_pretrain_file2 = 'invalid_pretrain_file2.txt'
    _mk_my_invalid_pretrain_file2(os.path.join(embed_root, embed_name), elem_delim,
                                  invalid_pretrain_file2)
    pretrain_file_path = os.path.join(embed_root, embed_name, invalid_pretrain_file2)
    assertRaises(AssertionError, text.embedding.CustomEmbedding, pretrain_file_path, elem_delim)
Beispiel #12
0
def test_resize_gpu():
    # Test with normal case 3D input float type
    data_in_3d = mx.np.random.uniform(0, 255, (300, 300, 3))
    out_nd_3d = transforms.Resize((100, 100))(data_in_3d)
    data_in_4d_nchw = mx.np.moveaxis(mx.np.expand_dims(data_in_3d, axis=0), 3,
                                     1)
    data_expected_3d = (mx.np.moveaxis(
        nd.contrib.BilinearResize2D(data_in_4d_nchw.as_nd_ndarray(),
                                    height=100,
                                    width=100,
                                    align_corners=False), 1, 3))[0]
    assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy())

    # Test with normal case 4D input float type
    data_in_4d = mx.np.random.uniform(0, 255, (2, 300, 300, 3))
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    data_in_4d_nchw = mx.np.moveaxis(data_in_4d, 3, 1)
    data_expected_4d = mx.np.moveaxis(
        nd.contrib.BilinearResize2D(data_in_4d_nchw.as_nd_ndarray(),
                                    height=100,
                                    width=100,
                                    align_corners=False), 1, 3)
    assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy())

    # Test invalid interp
    data_in_3d = mx.np.random.uniform(0, 255, (300, 300, 3))
    invalid_transform = transforms.Resize(-150,
                                          keep_ratio=False,
                                          interpolation=2)
    assertRaises(MXNetError, invalid_transform, data_in_3d)

    # Credited to Hang Zhang
    def py_bilinear_resize_nhwc(x, outputHeight, outputWidth):
        batch, inputHeight, inputWidth, channel = x.shape
        if outputHeight == inputHeight and outputWidth == inputWidth:
            return x
        y = np.empty([batch, outputHeight, outputWidth,
                      channel]).astype('uint8')
        rheight = 1.0 * (inputHeight - 1) / (outputHeight -
                                             1) if outputHeight > 1 else 0.0
        rwidth = 1.0 * (inputWidth - 1) / (outputWidth -
                                           1) if outputWidth > 1 else 0.0
        for h2 in range(outputHeight):
            h1r = 1.0 * h2 * rheight
            h1 = int(np.floor(h1r))
            h1lambda = h1r - h1
            h1p = 1 if h1 < (inputHeight - 1) else 0
            for w2 in range(outputWidth):
                w1r = 1.0 * w2 * rwidth
                w1 = int(np.floor(w1r))
                w1lambda = w1r - w1
                w1p = 1 if w1 < (inputHeight - 1) else 0
                for b in range(batch):
                    for c in range(channel):
                        y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \
                            w1lambda*x[b][h1][w1+w1p][c]) + \
                            h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \
                            w1lambda*x[b][h1+h1p][w1+w1p][c])
        return y
def test_sparse_nd_exception():
    """ test invalid sparse operator will throw a exception """
    a = mx.nd.ones((2,2))
    assertRaises(mx.base.MXNetError, mx.nd.sparse.retain, a, invalid_arg="garbage_value")
    assertRaises(ValueError, mx.nd.sparse.csr_matrix, a, shape=(3,2))
    assertRaises(ValueError, mx.nd.sparse.csr_matrix, (2,2), shape=(3,2))
    assertRaises(ValueError, mx.nd.sparse.row_sparse_array, (2,2), shape=(3,2))
    assertRaises(ValueError, mx.nd.sparse.zeros, "invalid_stype", (2,2))
def test_sparse_nd_exception():
    """ test invalid sparse operator will throw a exception """
    a = mx.nd.ones((2,2))
    assertRaises(mx.base.MXNetError, mx.nd.sparse.retain, a, invalid_arg="garbage_value")
    assertRaises(ValueError, mx.nd.sparse.csr_matrix, a, shape=(3,2))
    assertRaises(ValueError, mx.nd.sparse.csr_matrix, (2,2), shape=(3,2))
    assertRaises(ValueError, mx.nd.sparse.row_sparse_array, (2,2), shape=(3,2))
    assertRaises(ValueError, mx.nd.sparse.zeros, "invalid_stype", (2,2))
Beispiel #15
0
def test_module_bind():
    sym = mx.sym.Variable('data')
    sym = mx.sym.Activation(data=sym, act_type='relu', __layout__='TNC')

    mod = mx.mod.Module(sym, ('data',), None, context=[mx.cpu(0), mx.cpu(1)])
    assertRaises(TypeError, mod.bind, data_shapes=[('data', mx.nd.array([10,10]))])
    assert mod.binded == False

    mod.bind(data_shapes=[('data', (10,10))])
    assert mod.binded == True
Beispiel #16
0
def test_module_bind():
    x = mx.sym.Variable("data")
    net = mx.sym.FullyConnected(x, num_hidden=1)

    mod = SVRGModule(symbol=net, data_names=['data'], label_names=None, update_freq=2)
    assertRaises(TypeError, mod.bind, data_shapes=['data', mx.nd.zeros(shape=(2, 1))])

    mod.bind(data_shapes=[('data', (2, 1))])
    assert mod.binded == True
    assert mod._mod_aux.binded == True
Beispiel #17
0
def test_module_bind():
    sym = mx.sym.Variable('data')
    sym = mx.sym.Activation(data=sym, act_type='relu', __layout__='TNC')

    mod = mx.mod.Module(sym, ('data',), None, context=[mx.cpu(0), mx.cpu(1)])
    assertRaises(TypeError, mod.bind, data_shapes=[('data', mx.nd.array([10,10]))])
    assert mod.binded == False

    mod.bind(data_shapes=[('data', (10,10))])
    assert mod.binded == True
def test_module_bind():
    x = mx.sym.Variable("data")
    net = mx.sym.FullyConnected(x, num_hidden=1)

    mod = SVRGModule(symbol=net, data_names=['data'], label_names=None, update_freq=2)
    assertRaises(TypeError, mod.bind, data_shapes=['data', mx.nd.zeros(shape=(2, 1))])

    mod.bind(data_shapes=[('data', (2, 1))])
    assert mod.binded == True
    assert mod._mod_aux.binded == True
Beispiel #19
0
def test_resize():
    # Test with normal case 3D input float type
    data_in_3d = nd.random.uniform(0, 255, (300, 300, 3))
    out_nd_3d = transforms.Resize((100, 100))(data_in_3d)
    data_in_4d_nchw = nd.moveaxis(nd.expand_dims(data_in_3d, axis=0), 3, 1)
    data_expected_3d = (nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3))[0]
    assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy())

    # Test with normal case 4D input float type
    data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3))
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    data_in_4d_nchw = nd.moveaxis(data_in_4d, 3, 1)
    data_expected_4d = nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3)
    assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy())

    # Test invalid interp
    data_in_3d = nd.random.uniform(0, 255, (300, 300, 3))
    invalid_transform = transforms.Resize(-150, keep_ratio=False, interpolation=2)
    assertRaises(MXNetError, invalid_transform, data_in_3d)

    # Credited to Hang Zhang
    def py_bilinear_resize_nhwc(x, outputHeight, outputWidth):
        batch, inputHeight, inputWidth, channel = x.shape
        if outputHeight == inputHeight and outputWidth == inputWidth:
            return x
        y = np.empty([batch, outputHeight, outputWidth, channel]).astype('uint8')
        rheight = 1.0 * (inputHeight - 1) / (outputHeight - 1) if outputHeight > 1 else 0.0
        rwidth = 1.0 * (inputWidth - 1) / (outputWidth - 1) if outputWidth > 1 else 0.0
        for h2 in range(outputHeight):
            h1r = 1.0 * h2 * rheight
            h1 = int(np.floor(h1r))
            h1lambda = h1r - h1
            h1p = 1 if h1 < (inputHeight - 1) else 0
            for w2 in range(outputWidth):
                w1r = 1.0 * w2 * rwidth
                w1 = int(np.floor(w1r))
                w1lambda = w1r - w1
                w1p = 1 if w1 < (inputHeight - 1) else 0
                for b in range(batch):
                    for c in range(channel):
                        y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \
                            w1lambda*x[b][h1][w1+w1p][c]) + \
                            h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \
                            w1lambda*x[b][h1+h1p][w1+w1p][c])
        return y

    # Test with normal case 3D input int8 type
    data_in_4d = nd.random.uniform(0, 255, (1, 300, 300, 3)).astype('uint8')
    out_nd_3d = transforms.Resize((100, 100))(data_in_4d[0])
    assert_almost_equal(out_nd_3d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100)[0], atol=1.0)

    # Test with normal case 4D input int8 type
    data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)).astype('uint8')
    out_nd_4d = transforms.Resize((100, 100))(data_in_4d)
    assert_almost_equal(out_nd_4d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100), atol=1.0)
def test_get_and_pretrain_file_names():
    assert len(text.embedding.get_pretrained_file_names(
        embedding_name='fasttext')) == 327

    assert len(text.embedding.get_pretrained_file_names(embedding_name='glove')) == 10

    reg = text.embedding.get_pretrained_file_names(embedding_name=None)

    assert len(reg['glove']) == 10
    assert len(reg['fasttext']) == 327

    assertRaises(KeyError, text.embedding.get_pretrained_file_names, 'unknown$$')
Beispiel #21
0
def test_NDArrayIter_csr():
    # creating toy data
    num_rows = rnd.randint(5, 15)
    num_cols = rnd.randint(1, 20)
    batch_size = rnd.randint(1, num_rows)
    shape = (num_rows, num_cols)
    csr, _ = rand_sparse_ndarray(shape, 'csr')
    dns = csr.asnumpy()

    # CSRNDArray or scipy.sparse.csr_matrix with last_batch_handle not equal to 'discard' will throw NotImplementedError
    assertRaises(NotImplementedError, mx.io.NDArrayIter,
                 {'data': csr}, dns, batch_size)
    try:
        import scipy.sparse as spsp
        train_data = spsp.csr_matrix(dns)
        assertRaises(NotImplementedError, mx.io.NDArrayIter,
                     {'data': train_data}, dns, batch_size)
    except ImportError:
        pass

    # scipy.sparse.csr_matrix with shuffle
    csr_iter = iter(mx.io.NDArrayIter({'data': train_data}, dns, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_empty_list = iter(mx.io.NDArrayIter({'data': train_data}, [], batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_None = iter(mx.io.NDArrayIter({'data': train_data}, None, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list,
                          csr_iter_None, num_rows, batch_size)

    # CSRNDArray with shuffle
    csr_iter = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, dns, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_empty_list = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, [], batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_None = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, None, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list,
                          csr_iter_None, num_rows, batch_size)

    # make iterators
    csr_iter = iter(mx.io.NDArrayIter(
        csr, csr, batch_size, last_batch_handle='discard'))
    begin = 0
    for batch in csr_iter:
        expected = np.zeros((batch_size, num_cols))
        end = begin + batch_size
        expected[:num_rows - begin] = dns[begin:end]
        if end > num_rows:
            expected[num_rows - begin:] = dns[0:end - num_rows]
        assert_almost_equal(batch.data[0].asnumpy(), expected)
        begin += batch_size
Beispiel #22
0
def test_NDArrayIter_csr():
    # creating toy data
    num_rows = rnd.randint(5, 15)
    num_cols = rnd.randint(1, 20)
    batch_size = rnd.randint(1, num_rows)
    shape = (num_rows, num_cols)
    csr, _ = rand_sparse_ndarray(shape, 'csr')
    dns = csr.asnumpy()

    # CSRNDArray or scipy.sparse.csr_matrix with last_batch_handle not equal to 'discard' will throw NotImplementedError
    assertRaises(NotImplementedError, mx.io.NDArrayIter,
                 {'data': csr}, dns, batch_size)
    try:
        import scipy.sparse as spsp
        train_data = spsp.csr_matrix(dns)
        assertRaises(NotImplementedError, mx.io.NDArrayIter,
                     {'data': train_data}, dns, batch_size)
    except ImportError:
        pass
    
    # scipy.sparse.csr_matrix with shuffle
    csr_iter = iter(mx.io.NDArrayIter({'data': train_data}, dns, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_empty_list = iter(mx.io.NDArrayIter({'data': train_data}, [], batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_None = iter(mx.io.NDArrayIter({'data': train_data}, None, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list,
                          csr_iter_None, num_rows, batch_size)

    # CSRNDArray with shuffle
    csr_iter = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, dns, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_empty_list = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, [], batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    csr_iter_None = iter(mx.io.NDArrayIter({'csr_data': csr, 'dns_data': dns}, None, batch_size,
                                      shuffle=True, last_batch_handle='discard'))
    _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list,
                          csr_iter_None, num_rows, batch_size)

    # make iterators
    csr_iter = iter(mx.io.NDArrayIter(
        csr, csr, batch_size, last_batch_handle='discard'))
    begin = 0
    for batch in csr_iter:
        expected = np.zeros((batch_size, num_cols))
        end = begin + batch_size
        expected[:num_rows - begin] = dns[begin:end]
        if end > num_rows:
            expected[num_rows - begin:] = dns[0:end - num_rows]
        assert_almost_equal(batch.data[0].asnumpy(), expected)
        begin += batch_size
Beispiel #23
0
 def _test_crop_resize_with_diff_type(dtype):
     # test normal case
     data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype)
     out_nd = transforms.CropResize(0, 0, 3, 2)(data_in)
     out_np = out_nd.asnumpy()
     assert(out_np.sum() == 180)
     assert((out_np[0:2,1,1].flatten() == [4, 16]).all())
     # test 4D input
     data_bath_in = nd.arange(180).reshape((2, 6, 5, 3)).astype(dtype)
     out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in)
     out_batch_np = out_batch_nd.asnumpy()
     assert(out_batch_np.sum() == 7524)
     assert((out_batch_np[0:2,0:4,1,1].flatten() == [37,  52,  67,  82, 127, 142, 157, 172]).all())
     # test normal case with resize
     data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype)
     out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_in)
     data_expected = image.imresize(nd.slice(data_in, (0, 0, 0), (50, 100 , 3)), 25, 25, 2)
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test 4D input with resize
     data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype)
     out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_bath_in)
     for i in range(len(out_batch_nd)):
         assert_almost_equal(image.imresize(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3)), 25, 25, 2).asnumpy(),
             out_batch_nd[i].asnumpy())
     # test with resize height and width should be greater than 0
     transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 2)
     assertRaises(MXNetError, transformer, data_in)
     # test height and width should be greater than 0 
     transformer = transforms.CropResize(0, 0, -100, -50)
     assertRaises(MXNetError, transformer, data_in)
     # test cropped area is bigger than input data
     transformer = transforms.CropResize(150, 200, 200, 500)
     assertRaises(MXNetError, transformer, data_in)
     assertRaises(MXNetError, transformer, data_bath_in)
 def _test_crop_resize_with_diff_type(dtype):
     # test normal case
     data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype)
     out_nd = transforms.CropResize(0, 0, 3, 2)(data_in)
     out_np = out_nd.asnumpy()
     assert(out_np.sum() == 180)
     assert((out_np[0:2,1,1].flatten() == [4, 16]).all())
     # test 4D input
     data_bath_in = nd.arange(180).reshape((2, 6, 5, 3)).astype(dtype)
     out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in)
     out_batch_np = out_batch_nd.asnumpy()
     assert(out_batch_np.sum() == 7524)
     assert((out_batch_np[0:2,0:4,1,1].flatten() == [37,  52,  67,  82, 127, 142, 157, 172]).all())
     # test normal case with resize
     data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype)
     out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_in)
     data_expected = transforms.Resize(size=25, interpolation=1)(nd.slice(data_in, (0, 0, 0), (50, 100, 3)))
     assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
     # test 4D input with resize
     data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype)
     out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_bath_in)
     for i in range(len(out_batch_nd)):
         actual = transforms.Resize(size=25, interpolation=1)(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3))).asnumpy()
         expected = out_batch_nd[i].asnumpy()
         assert_almost_equal(expected, actual)
     # test with resize height and width should be greater than 0
     transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 1)
     assertRaises(MXNetError, transformer, data_in)
     # test height and width should be greater than 0
     transformer = transforms.CropResize(0, 0, -100, -50)
     assertRaises(MXNetError, transformer, data_in)
     # test cropped area is bigger than input data
     transformer = transforms.CropResize(150, 200, 200, 500)
     assertRaises(MXNetError, transformer, data_in)
     assertRaises(MXNetError, transformer, data_bath_in)
Beispiel #25
0
def test_module_set_params():
    # data iter
    data = mx.nd.array([[0.05, .10]]);
    label = mx.nd.array([[.01, 0.99]]);
    train_data = mx.io.NDArrayIter(data, label, batch_size=1)

    # symbols
    x = mx.symbol.Variable('data')
    x = mx.symbol.FullyConnected(name='fc_0', data=x, num_hidden=2)
    x = mx.symbol.Activation(name="act_0", data=x, act_type='sigmoid')
    x = mx.symbol.FullyConnected(name='fc_1', data=x, num_hidden=2)
    x = mx.symbol.Activation(name="act_1", data=x, act_type='sigmoid')
    x = mx.symbol.LinearRegressionOutput(data=x, name='softmax', grad_scale=2)

    # create module
    mod = mx.mod.Module(x, context=[mx.cpu()]);
    mod.bind(train_data.provide_data, label_shapes=train_data.provide_label,
             for_training=True)

    arg_params_correct = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]),
                  'fc_0_bias'  : mx.nd.array([.35, .35]),
                  'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]]),
                  'fc_1_bias'  : mx.nd.array([.60, .60])}

    arg_params_missing = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]),
                  'fc_0_bias'  : mx.nd.array([.35, .35]),
                  'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]])}

    arg_params_extra = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]),
                  'fc_0_bias'  : mx.nd.array([.35, .35]),
                  'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]]),
                  'fc_1_bias'  : mx.nd.array([.60, .60]),
                  'fc_2_weight': mx.nd.array([.60, .60])}

    arg_params_missing_extra = {'fc_2_weight': mx.nd.array([.60, .60])}

    # test regular set_params
    mod.set_params(force_init=True, arg_params=arg_params_correct, aux_params={})

    # test allow missing
    mod.set_params(force_init=True, arg_params=arg_params_missing, aux_params={}, allow_missing=True)
    assertRaises(RuntimeError, mod.set_params,
                 force_init=True, arg_params=arg_params_missing,
                 aux_params={}, allow_missing=False)

    # test allow extra
    mod.set_params(force_init=True, arg_params=arg_params_extra, aux_params={}, allow_missing=True, allow_extra=True)
    assertRaises(ValueError, mod.set_params,
                 force_init=True, arg_params=arg_params_extra,
                 aux_params={}, allow_missing=True, allow_extra=False)

    # test allow missing + extra,
    assertRaises(RuntimeError, mod.set_params,
                 force_init=True, arg_params=arg_params_missing_extra,
                 aux_params={}, allow_missing=False, allow_extra=False)

    # test allow missing + extra, this will throw a runtime error
    assertRaises(ValueError, mod.set_params,
                 force_init=True, arg_params=arg_params_missing_extra,
                 aux_params={}, allow_missing=True, allow_extra=False)
Beispiel #26
0
def test_module_set_params():
    # data iter
    data = mx.nd.array([[0.05, .10]]);
    label = mx.nd.array([[.01, 0.99]]);
    train_data = mx.io.NDArrayIter(data, label, batch_size=1)

    # symbols
    x = mx.symbol.Variable('data')
    x = mx.symbol.FullyConnected(name='fc_0', data=x, num_hidden=2)
    x = mx.symbol.Activation(name="act_0", data=x, act_type='sigmoid')
    x = mx.symbol.FullyConnected(name='fc_1', data=x, num_hidden=2)
    x = mx.symbol.Activation(name="act_1", data=x, act_type='sigmoid')
    x = mx.symbol.LinearRegressionOutput(data=x, name='softmax', grad_scale=2)

    # create module
    mod = mx.mod.Module(x, context=[mx.cpu()]);
    mod.bind(train_data.provide_data, label_shapes=train_data.provide_label,
             for_training=True)

    arg_params_correct = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]),
                  'fc_0_bias'  : mx.nd.array([.35, .35]),
                  'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]]),
                  'fc_1_bias'  : mx.nd.array([.60, .60])}

    arg_params_missing = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]),
                  'fc_0_bias'  : mx.nd.array([.35, .35]),
                  'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]])}

    arg_params_extra = {'fc_0_weight': mx.nd.array([[.15, .20], [.25, .30]]),
                  'fc_0_bias'  : mx.nd.array([.35, .35]),
                  'fc_1_weight': mx.nd.array([[.40, .45], [.50, .55]]),
                  'fc_1_bias'  : mx.nd.array([.60, .60]),
                  'fc_2_weight': mx.nd.array([.60, .60])}

    arg_params_missing_extra = {'fc_2_weight': mx.nd.array([.60, .60])}

    # test regular set_params
    mod.set_params(force_init=True, arg_params=arg_params_correct, aux_params={})

    # test allow missing
    mod.set_params(force_init=True, arg_params=arg_params_missing, aux_params={}, allow_missing=True)
    assertRaises(RuntimeError, mod.set_params,
                 force_init=True, arg_params=arg_params_missing,
                 aux_params={}, allow_missing=False)

    # test allow extra
    mod.set_params(force_init=True, arg_params=arg_params_extra, aux_params={}, allow_missing=True, allow_extra=True)
    assertRaises(ValueError, mod.set_params,
                 force_init=True, arg_params=arg_params_extra,
                 aux_params={}, allow_missing=True, allow_extra=False)

    # test allow missing + extra,
    assertRaises(RuntimeError, mod.set_params,
                 force_init=True, arg_params=arg_params_missing_extra,
                 aux_params={}, allow_missing=False, allow_extra=False)

    # test allow missing + extra, this will throw a runtime error
    assertRaises(ValueError, mod.set_params,
                 force_init=True, arg_params=arg_params_missing_extra,
                 aux_params={}, allow_missing=True, allow_extra=False)
 def check_invalid_key_types_single(kv, key):
     dns_val = mx.nd.ones(shape) * 2
     rsp_val = dns_val.tostype('row_sparse')
     assertRaises(MXNetError, kv.init, key, dns_val)
     assertRaises(MXNetError, kv.push, key, dns_val)
     assertRaises(MXNetError, kv.pull, key, dns_val)
     assertRaises(MXNetError, kv.row_sparse_pull, key, rsp_val,
                  row_ids=mx.nd.array([1]))
 def check_invalid_key_types_list(kv, key):
     dns_val = [mx.nd.ones(shape) * 2] * len(key)
     rsp_val = [val.tostype('row_sparse') for val in dns_val]
     assertRaises(MXNetError, kv.init, key, dns_val)
     assertRaises(MXNetError, kv.push, key, dns_val)
     assertRaises(MXNetError, kv.pull, key, dns_val)
     assertRaises(MXNetError, kv.row_sparse_pull, key, rsp_val,
                      row_ids=[mx.nd.array([1])] * len(key))
def test_get_and_pretrain_file_names():
    assert len(
        text.embedding.get_pretrained_file_names(
            embedding_name='fasttext')) == 327

    assert len(
        text.embedding.get_pretrained_file_names(embedding_name='glove')) == 10

    reg = text.embedding.get_pretrained_file_names(embedding_name=None)

    assert len(reg['glove']) == 10
    assert len(reg['fasttext']) == 327

    assertRaises(KeyError, text.embedding.get_pretrained_file_names,
                 'unknown$$')
Beispiel #30
0
 def check_invalid_key_types_list(kv, key):
     dns_val = [mx.nd.ones(shape) * 2] * len(key)
     rsp_val = [val.tostype('row_sparse') for val in dns_val]
     assertRaises(MXNetError, kv.init, key, dns_val)
     assertRaises(MXNetError, kv.push, key, dns_val)
     assertRaises(MXNetError, kv.pull, key, dns_val)
     assertRaises(MXNetError, kv.row_sparse_pull, key, rsp_val,
                      row_ids=[mx.nd.array([1])] * len(key))
Beispiel #31
0
 def check_invalid_key_types_single(kv, key):
     dns_val = mx.nd.ones(shape) * 2
     rsp_val = dns_val.tostype('row_sparse')
     assertRaises(MXNetError, kv.init, key, dns_val)
     assertRaises(MXNetError, kv.push, key, dns_val)
     assertRaises(MXNetError, kv.pull, key, dns_val)
     assertRaises(MXNetError, kv.row_sparse_pull, key, rsp_val,
                  row_ids=mx.nd.array([1]))
Beispiel #32
0
def test_to_tensor():
    # 3D Input
    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert_almost_equal(out_nd.asnumpy(), np.transpose(
        data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))

    # 4D Input
    data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
    assert_almost_equal(out_nd.asnumpy(), np.transpose(
                        data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2)))

    # Invalid Input
    invalid_data_in = nd.random.uniform(0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8)
    transformer = transforms.ToTensor()
    assertRaises(MXNetError, transformer, invalid_data_in)
def test_image_iter_exception():
    def check_cifar10_exception():
        get_cifar10()
        dataiter = mx.io.ImageRecordIter(
            path_imgrec="data/cifar/train.rec",
            mean_img="data/cifar/cifar10_mean.bin",
            rand_crop=False,
            and_mirror=False,
            shuffle=False,
            data_shape=(5, 28, 28),
            batch_size=100,
            preprocess_threads=4,
            prefetch_buffer=1)
        labelcount = [0 for i in range(10)]
        batchcount = 0
        for batch in dataiter:
            pass
    assertRaises(MXNetError, check_cifar10_exception)
def test_indices_to_tokens():
    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    vocab = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1,
                                  unknown_token='<unknown>', reserved_tokens=None)
    i1 = vocab.to_tokens(1)
    assert i1 == 'c'

    i2 = vocab.to_tokens([1])
    assert i2 == ['c']

    i3 = vocab.to_tokens([0, 0])
    assert i3 == ['<unknown>', '<unknown>']

    i4 = vocab.to_tokens([3, 0, 3, 2])
    assert i4 == ['a', '<unknown>', 'a', 'b']

    assertRaises(ValueError, vocab.to_tokens, 100)
Beispiel #35
0
def test_image_iter_exception():
    def check_cifar10_exception():
        get_cifar10()
        dataiter = mx.io.ImageRecordIter(
            path_imgrec="data/cifar/train.rec",
            mean_img="data/cifar/cifar10_mean.bin",
            rand_crop=False,
            and_mirror=False,
            shuffle=False,
            data_shape=(5, 28, 28),
            batch_size=100,
            preprocess_threads=4,
            prefetch_buffer=1)
        labelcount = [0 for i in range(10)]
        batchcount = 0
        for batch in dataiter:
            pass
    assertRaises(MXNetError, check_cifar10_exception)
def test_indices_to_tokens():
    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    indexer = text.indexer.TokenIndexer(counter, most_freq_count=None, min_freq=1,
                                        unknown_token='<unknown>', reserved_tokens=None)
    i1 = indexer.to_tokens(1)
    assert i1 == 'c'

    i2 = indexer.to_tokens([1])
    assert i2 == ['c']

    i3 = indexer.to_tokens([0, 0])
    assert i3 == ['<unknown>', '<unknown>']

    i4 = indexer.to_tokens([3, 0, 3, 2])
    assert i4 == ['a', '<unknown>', 'a', 'b']

    assertRaises(ValueError, indexer.to_tokens, 100)
Beispiel #37
0
def test_get_embedding_names_and_pretrain_files():
    assert len(
        TokenEmbedding.get_embedding_and_pretrained_file_names(
            embedding_name='fasttext')) == 294

    assert len(
        TokenEmbedding.get_embedding_and_pretrained_file_names(
            embedding_name='glove')) == 10

    reg = TokenEmbedding.get_embedding_and_pretrained_file_names(
        embedding_name=None)

    assert len(reg['glove']) == 10
    assert len(reg['fasttext']) == 294

    assertRaises(KeyError,
                 TokenEmbedding.get_embedding_and_pretrained_file_names,
                 'unknown$$')
def test_sparse_parameter():
    p = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse')
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    row_id = mx.nd.arange(0, 10, ctx=mx.cpu(1))
    assert len(p.list_grad()) == 2
    # getting row_sparse data without trainer throws an exception
    assertRaises(RuntimeError, p.list_row_sparse_data, row_id)
    trainer = mx.gluon.Trainer([p], 'sgd')
    assert len(p.list_row_sparse_data(row_id)) == 2
    weight = p.row_sparse_data(row_id)
    assert weight.context == mx.cpu(1)
    assert weight.shape == (10, 10)
    assert weight.stype == 'row_sparse'
    assert p.var().name == 'weight'
    assert p.var().attr('__storage_type__') == str(_STORAGE_TYPE_STR_TO_ID['row_sparse'])
    assert p.grad(mx.cpu(0)).stype == 'row_sparse'

    p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)])
    assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)]
def test_parameter_invalid_access():
    # cannot call data on row_sparse parameters
    p0 = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse')
    p0.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assertRaises(RuntimeError, p0.data)
    assertRaises(RuntimeError, p0.list_data)
    row_id = mx.nd.arange(0, 10)
    # cannot call row_sparse_data on dense parameters
    p1 = gluon.Parameter('weight', shape=(10, 10))
    p1.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assertRaises(RuntimeError, p1.row_sparse_data, row_id.copyto(mx.cpu(0)))
    assertRaises(RuntimeError, p1.list_row_sparse_data, row_id)
Beispiel #40
0
def test_parameter_invalid_access():
    # cannot call data on row_sparse parameters
    p0 = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse')
    p0.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assertRaises(RuntimeError, p0.data)
    assertRaises(RuntimeError, p0.list_data)
    row_id = mx.nd.arange(0, 10)
    # cannot call row_sparse_data on dense parameters
    p1 = gluon.Parameter('weight', shape=(10, 10))
    p1.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assertRaises(RuntimeError, p1.row_sparse_data, row_id.copyto(mx.cpu(0)))
    assertRaises(RuntimeError, p1.list_row_sparse_data, row_id)
Beispiel #41
0
def test_sparse_parameter():
    p = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse')
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    row_id = mx.nd.arange(0, 10, ctx=mx.cpu(1))
    assert len(p.list_grad()) == 2
    # getting row_sparse data without trainer throws an exception
    assertRaises(RuntimeError, p.list_row_sparse_data, row_id)
    trainer = mx.gluon.Trainer([p], 'sgd')
    assert len(p.list_row_sparse_data(row_id)) == 2
    weight = p.row_sparse_data(row_id)
    assert weight.context == mx.cpu(1)
    assert weight.shape == (10, 10)
    assert weight.stype == 'row_sparse'
    assert p.var().name == 'weight'
    assert p.var().attr('__storage_type__') == str(_STORAGE_TYPE_STR_TO_ID['row_sparse'])
    assert p.grad(mx.cpu(0)).stype == 'row_sparse'

    p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)])
    assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)]
def test_indices_to_tokens():
    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    vocab = text.vocab.Vocabulary(counter,
                                  most_freq_count=None,
                                  min_freq=1,
                                  unknown_token='<unknown>',
                                  reserved_tokens=None)
    i1 = vocab.to_tokens(1)
    assert i1 == 'c'

    i2 = vocab.to_tokens([1])
    assert i2 == ['c']

    i3 = vocab.to_tokens([0, 0])
    assert i3 == ['<unknown>', '<unknown>']

    i4 = vocab.to_tokens([3, 0, 3, 2])
    assert i4 == ['a', '<unknown>', 'a', 'b']

    assertRaises(ValueError, vocab.to_tokens, 100)
Beispiel #43
0
def _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list, csr_iter_None, num_rows, batch_size):
    num_batch = 0
    for _, batch_empty_list, batch_empty_None in zip(csr_iter, csr_iter_empty_list, csr_iter_None):
        assert not batch_empty_list.label, 'label is not empty list'
        assert not batch_empty_None.label, 'label is not empty list'
        num_batch += 1

    assert(num_batch == num_rows // batch_size)
    assertRaises(StopIteration, csr_iter.next)
    assertRaises(StopIteration, csr_iter_empty_list.next)
    assertRaises(StopIteration, csr_iter_None.next)
Beispiel #44
0
def _test_NDArrayIter_csr(csr_iter, csr_iter_empty_list, csr_iter_None, num_rows, batch_size):
    num_batch = 0
    for _, batch_empty_list, batch_empty_None in zip(csr_iter, csr_iter_empty_list, csr_iter_None):
        assert not batch_empty_list.label, 'label is not empty list'
        assert not batch_empty_None.label, 'label is not empty list'
        num_batch += 1

    assert(num_batch == num_rows // batch_size)
    assertRaises(StopIteration, csr_iter.next)
    assertRaises(StopIteration, csr_iter_empty_list.next)
    assertRaises(StopIteration, csr_iter_None.next)
Beispiel #45
0
def test_buffer_load():
    nrepeat = 10
    with TemporaryDirectory(prefix='test_buffer_load_') as tmpdir:
        for repeat in range(nrepeat):
            # test load_buffer as list
            data = []
            for i in range(10):
                data.append(random_ndarray(np.random.randint(1, 5)))
            fname = os.path.join(tmpdir, 'list_{0}.param'.format(repeat))
            mx.nd.save(fname, data)
            with open(fname, 'rb') as dfile:
                buf_data = dfile.read()
                data2 = mx.nd.load_frombuffer(buf_data)
                assert len(data) == len(data2)
                for x, y in zip(data, data2):
                    assert np.sum(x.asnumpy() != y.asnumpy()) == 0
                # test garbage values
                assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer,
                             buf_data[:-10])
            # test load_buffer as dict
            dmap = {'ndarray xx %s' % i: x for i, x in enumerate(data)}
            fname = os.path.join(tmpdir, 'dict_{0}.param'.format(repeat))
            mx.nd.save(fname, dmap)
            with open(fname, 'rb') as dfile:
                buf_dmap = dfile.read()
                dmap2 = mx.nd.load_frombuffer(buf_dmap)
                assert len(dmap2) == len(dmap)
                for k, x in dmap.items():
                    y = dmap2[k]
                    assert np.sum(x.asnumpy() != y.asnumpy()) == 0
                # test garbage values
                assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer,
                             buf_dmap[:-10])

            # we expect the single ndarray to be converted into a list containing the ndarray
            single_ndarray = data[0]
            fname = os.path.join(tmpdir, 'single_{0}.param'.format(repeat))
            mx.nd.save(fname, single_ndarray)
            with open(fname, 'rb') as dfile:
                buf_single_ndarray = dfile.read()
                single_ndarray_loaded = mx.nd.load_frombuffer(
                    buf_single_ndarray)
                assert len(single_ndarray_loaded) == 1
                single_ndarray_loaded = single_ndarray_loaded[0]
                assert np.sum(single_ndarray.asnumpy() !=
                              single_ndarray_loaded.asnumpy()) == 0
                # test garbage values
                assertRaises(mx.base.MXNetError, mx.nd.load_frombuffer,
                             buf_single_ndarray[:-10])
def test_random_rotation():
    # test exceptions for probability input outside of [0,1]
    assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=1.1)
    assertRaises(ValueError, transforms.RandomRotation, [-10, 10.], rotate_with_proba=-0.3)
    # test `forward`
    transformer = transforms.RandomRotation([-10, 10.])
    assertRaises(TypeError, transformer, mx.nd.ones((3, 30, 60), dtype='uint8'))
    single_image = mx.nd.ones((3, 30, 60), dtype='float32')
    single_output = transformer(single_image)
    assert same(single_output.shape, (3, 30, 60))
    batch_image = mx.nd.ones((3, 3, 30, 60), dtype='float32')
    batch_output = transformer(batch_image)
    assert same(batch_output.shape, (3, 3, 30, 60))
    # test identity (rotate_with_proba = 0)
    transformer = transforms.RandomRotation([-100., 100.], rotate_with_proba=0.0)
    data = mx.nd.random_normal(shape=(3, 30, 60))
    assert_almost_equal(data, transformer(data))
def test_buffer_load():
    nrepeat = 10
    with TemporaryDirectory(prefix='test_buffer_load_') as tmpdir:
        for repeat in range(nrepeat):
            # test load_buffer as list
            data = []
            for i in range(10):
                data.append(random_ndarray(np.random.randint(1, 5)))
            fname = os.path.join(tmpdir, 'list_{0}.param'.format(repeat))
            mx.nd.save(fname, data)
            with open(fname, 'rb') as dfile:
                buf_data = dfile.read()
                data2 = mx.nd.load_frombuffer(buf_data)
                assert len(data) == len(data2)
                for x, y in zip(data, data2):
                    assert np.sum(x.asnumpy() != y.asnumpy()) == 0
                # test garbage values
                assertRaises(mx.base.MXNetError,  mx.nd.load_frombuffer, buf_data[:-10])
            # test load_buffer as dict
            dmap = {'ndarray xx %s' % i : x for i, x in enumerate(data)}
            fname = os.path.join(tmpdir, 'dict_{0}.param'.format(repeat))
            mx.nd.save(fname, dmap)
            with open(fname, 'rb') as dfile:
                buf_dmap = dfile.read()
                dmap2 = mx.nd.load_frombuffer(buf_dmap)
                assert len(dmap2) == len(dmap)
                for k, x in dmap.items():
                    y = dmap2[k]
                    assert np.sum(x.asnumpy() != y.asnumpy()) == 0
                # test garbage values
                assertRaises(mx.base.MXNetError,  mx.nd.load_frombuffer, buf_dmap[:-10])

            # we expect the single ndarray to be converted into a list containing the ndarray
            single_ndarray = data[0]
            fname = os.path.join(tmpdir, 'single_{0}.param'.format(repeat))
            mx.nd.save(fname, single_ndarray)
            with open(fname, 'rb') as dfile:
                buf_single_ndarray = dfile.read()
                single_ndarray_loaded = mx.nd.load_frombuffer(buf_single_ndarray)
                assert len(single_ndarray_loaded) == 1
                single_ndarray_loaded = single_ndarray_loaded[0]
                assert np.sum(single_ndarray.asnumpy() != single_ndarray_loaded.asnumpy()) == 0
                # test garbage values
                assertRaises(mx.base.MXNetError,  mx.nd.load_frombuffer, buf_single_ndarray[:-10])
def test_composite_embedding_with_one_embedding():
    embed_root = 'embeddings'
    embed_name = 'my_embed'
    elem_delim = '\t'
    pretrain_file = 'my_pretrain_file1.txt'

    _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim, pretrain_file)

    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file)

    my_embed = text.embedding.CustomEmbedding(pretrain_file_path, elem_delim,
                                              init_unknown_vec=nd.ones)

    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    v1 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unk>',
                               reserved_tokens=['<pad>'])
    ce1 = text.embedding.CompositeEmbedding(v1, my_embed)

    assert ce1.token_to_idx == {'<unk>': 0, '<pad>': 1, 'c': 2, 'b': 3, 'a': 4, 'some_word$': 5}
    assert ce1.idx_to_token == ['<unk>', '<pad>', 'c', 'b', 'a', 'some_word$']

    assert_almost_equal(ce1.idx_to_vec.asnumpy(),
                        np.array([[1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [0.6, 0.7, 0.8, 0.9, 1],
                                  [0.1, 0.2, 0.3, 0.4, 0.5],
                                  [1, 1, 1, 1, 1]])
                        )

    assert ce1.vec_len == 5
    assert ce1.reserved_tokens == ['<pad>']

    assert_almost_equal(ce1.get_vecs_by_tokens('c').asnumpy(),
                        np.array([1, 1, 1, 1, 1])
                        )

    assert_almost_equal(ce1.get_vecs_by_tokens(['c']).asnumpy(),
                        np.array([[1, 1, 1, 1, 1]])
                        )

    assert_almost_equal(ce1.get_vecs_by_tokens(['a', 'not_exist']).asnumpy(),
                        np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
                                  [1, 1, 1, 1, 1]])
                        )

    assert_almost_equal(ce1.get_vecs_by_tokens(['a', 'b']).asnumpy(),
                        np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
                                  [0.6, 0.7, 0.8, 0.9, 1]])
                        )

    assert_almost_equal(ce1.get_vecs_by_tokens(['A', 'b']).asnumpy(),
                        np.array([[1, 1, 1, 1, 1],
                                  [0.6, 0.7, 0.8, 0.9, 1]])
                        )

    assert_almost_equal(ce1.get_vecs_by_tokens(['A', 'b'], lower_case_backup=True).asnumpy(),
                        np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
                                  [0.6, 0.7, 0.8, 0.9, 1]])
                        )

    ce1.update_token_vectors(['a', 'b'],
                             nd.array([[2, 2, 2, 2, 2],
                                      [3, 3, 3, 3, 3]])
                             )

    assert_almost_equal(ce1.idx_to_vec.asnumpy(),
                        np.array([[1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [3, 3, 3, 3, 3],
                                  [2, 2, 2, 2, 2],
                                  [1, 1, 1, 1, 1]])
                        )

    assertRaises(ValueError, ce1.update_token_vectors, 'unknown$$$', nd.array([0, 0, 0, 0, 0]))

    assertRaises(AssertionError, ce1.update_token_vectors, '<unk>',
                 nd.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]))

    assertRaises(AssertionError, ce1.update_token_vectors, '<unk>', nd.array([0]))

    ce1.update_token_vectors(['<unk>'], nd.array([0, 0, 0, 0, 0]))
    assert_almost_equal(ce1.idx_to_vec.asnumpy(),
                        np.array([[0, 0, 0, 0, 0],
                                  [1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [3, 3, 3, 3, 3],
                                  [2, 2, 2, 2, 2],
                                  [1, 1, 1, 1, 1]])
                        )
    ce1.update_token_vectors(['<unk>'], nd.array([[10, 10, 10, 10, 10]]))
    assert_almost_equal(ce1.idx_to_vec.asnumpy(),
                        np.array([[10, 10, 10, 10, 10],
                                  [1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [3, 3, 3, 3, 3],
                                  [2, 2, 2, 2, 2],
                                  [1, 1, 1, 1, 1]])
                        )
    ce1.update_token_vectors('<unk>', nd.array([0, 0, 0, 0, 0]))
    assert_almost_equal(ce1.idx_to_vec.asnumpy(),
                        np.array([[0, 0, 0, 0, 0],
                                  [1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [3, 3, 3, 3, 3],
                                  [2, 2, 2, 2, 2],
                                  [1, 1, 1, 1, 1]])
                        )
    ce1.update_token_vectors('<unk>', nd.array([[10, 10, 10, 10, 10]]))
    assert_almost_equal(ce1.idx_to_vec.asnumpy(),
                        np.array([[10, 10, 10, 10, 10],
                                  [1, 1, 1, 1, 1],
                                  [1, 1, 1, 1, 1],
                                  [3, 3, 3, 3, 3],
                                  [2, 2, 2, 2, 2],
                                  [1, 1, 1, 1, 1]])
                        )
Beispiel #49
0
 def check_invalid_rsp_pull_list(kv, key):
     dns_val = [mx.nd.ones(shape) * 2] * len(key)
     assertRaises(MXNetError, kv.row_sparse_pull, key, out=dns_val,
                  row_ids=[mx.nd.array([1])] * len(key))
Beispiel #50
0
 def check_invalid_rsp_pull_single(kv, key):
     dns_val = mx.nd.ones(shape) * 2
     assertRaises(MXNetError, kv.row_sparse_pull,
                  key, out=dns_val, row_ids=mx.nd.array([1]))
Beispiel #51
0
def test_LibSVMIter():
    def check_libSVMIter_synthetic():
        cwd = os.getcwd()
        data_path = os.path.join(cwd, 'data.t')
        label_path = os.path.join(cwd, 'label.t')
        with open(data_path, 'w') as fout:
            fout.write('1.0 0:0.5 2:1.2\n')
            fout.write('-2.0\n')
            fout.write('-3.0 0:0.6 1:2.4 2:1.2\n')
            fout.write('4 2:-1.2\n')

        with open(label_path, 'w') as fout:
            fout.write('1.0\n')
            fout.write('-2.0 0:0.125\n')
            fout.write('-3.0 2:1.2\n')
            fout.write('4 1:1.0 2:-1.2\n')

        data_dir = os.path.join(cwd, 'data')
        data_train = mx.io.LibSVMIter(data_libsvm=data_path,
                                      label_libsvm=label_path,
                                      data_shape=(3, ),
                                      label_shape=(3, ),
                                      batch_size=3)

        first = mx.nd.array([[0.5, 0., 1.2], [0., 0., 0.], [0.6, 2.4, 1.2]])
        second = mx.nd.array([[0., 0., -1.2], [0.5, 0., 1.2], [0., 0., 0.]])
        i = 0
        for batch in iter(data_train):
            expected = first.asnumpy() if i == 0 else second.asnumpy()
            data = data_train.getdata()
            data.check_format(True)
            assert_almost_equal(data.asnumpy(), expected)
            i += 1

    def check_libSVMIter_news_data():
        news_metadata = {
            'name': 'news20.t',
            'origin_name': 'news20.t.bz2',
            'url':
            "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/news20.t.bz2",
            'feature_dim': 62060 + 1,
            'num_classes': 20,
            'num_examples': 3993,
        }
        batch_size = 33
        num_examples = news_metadata['num_examples']
        data_dir = os.path.join(os.getcwd(), 'data')
        get_bz2_data(data_dir, news_metadata['name'], news_metadata['url'],
                     news_metadata['origin_name'])
        path = os.path.join(data_dir, news_metadata['name'])
        data_train = mx.io.LibSVMIter(
            data_libsvm=path,
            data_shape=(news_metadata['feature_dim'], ),
            batch_size=batch_size)
        for epoch in range(2):
            num_batches = 0
            for batch in data_train:
                # check the range of labels
                data = batch.data[0]
                label = batch.label[0]
                data.check_format(True)
                assert (np.sum(label.asnumpy() > 20) == 0)
                assert (np.sum(label.asnumpy() <= 0) == 0)
                num_batches += 1
            expected_num_batches = num_examples / batch_size
            assert (num_batches == int(expected_num_batches)), num_batches
            data_train.reset()

    def check_libSVMIter_exception():
        cwd = os.getcwd()
        data_path = os.path.join(cwd, 'data.t')
        label_path = os.path.join(cwd, 'label.t')
        with open(data_path, 'w') as fout:
            fout.write('1.0 0:0.5 2:1.2\n')
            fout.write('-2.0\n')
            # Below line has a neg indice. Should throw an exception
            fout.write('-3.0 -1:0.6 1:2.4 2:1.2\n')
            fout.write('4 2:-1.2\n')

        with open(label_path, 'w') as fout:
            fout.write('1.0\n')
            fout.write('-2.0 0:0.125\n')
            fout.write('-3.0 2:1.2\n')
            fout.write('4 1:1.0 2:-1.2\n')
        data_dir = os.path.join(cwd, 'data')
        data_train = mx.io.LibSVMIter(data_libsvm=data_path,
                                      label_libsvm=label_path,
                                      data_shape=(3, ),
                                      label_shape=(3, ),
                                      batch_size=3)
        for batch in iter(data_train):
            data_train.get_data().asnumpy()

    check_libSVMIter_synthetic()
    check_libSVMIter_news_data()
    assertRaises(MXNetError, check_libSVMIter_exception)
Beispiel #52
0
def test_symbol_bool():
    x = mx.symbol.Variable('x')
    assertRaises(NotImplementedForSymbol, bool, x)
def test_composite_embedding_with_one_embedding():
    embed_root = 'embeddings'
    embed_name = 'my_embed'
    elem_delim = '\t'
    pretrain_file = 'my_pretrain_file1.txt'

    _mk_my_pretrain_file(os.path.join(embed_root, embed_name), elem_delim,
                         pretrain_file)

    pretrain_file_path = os.path.join(embed_root, embed_name, pretrain_file)

    my_embed = text.embedding.CustomEmbedding(pretrain_file_path,
                                              elem_delim,
                                              init_unknown_vec=nd.ones)

    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    v1 = text.vocab.Vocabulary(counter,
                               most_freq_count=None,
                               min_freq=1,
                               unknown_token='<unk>',
                               reserved_tokens=['<pad>'])
    ce1 = text.embedding.CompositeEmbedding(v1, my_embed)

    assert ce1.token_to_idx == {
        '<unk>': 0,
        '<pad>': 1,
        'c': 2,
        'b': 3,
        'a': 4,
        'some_word$': 5
    }
    assert ce1.idx_to_token == ['<unk>', '<pad>', 'c', 'b', 'a', 'some_word$']

    assert_almost_equal(
        ce1.idx_to_vec.asnumpy(),
        np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                  [0.6, 0.7, 0.8, 0.9, 1], [0.1, 0.2, 0.3, 0.4, 0.5],
                  [1, 1, 1, 1, 1]]))

    assert ce1.vec_len == 5
    assert ce1.reserved_tokens == ['<pad>']

    assert_almost_equal(
        ce1.get_vecs_by_tokens('c').asnumpy(), np.array([1, 1, 1, 1, 1]))

    assert_almost_equal(
        ce1.get_vecs_by_tokens(['c']).asnumpy(), np.array([[1, 1, 1, 1, 1]]))

    assert_almost_equal(
        ce1.get_vecs_by_tokens(['a', 'not_exist']).asnumpy(),
        np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [1, 1, 1, 1, 1]]))

    assert_almost_equal(
        ce1.get_vecs_by_tokens(['a', 'b']).asnumpy(),
        np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1]]))

    assert_almost_equal(
        ce1.get_vecs_by_tokens(['A', 'b']).asnumpy(),
        np.array([[1, 1, 1, 1, 1], [0.6, 0.7, 0.8, 0.9, 1]]))

    assert_almost_equal(
        ce1.get_vecs_by_tokens(['A', 'b'], lower_case_backup=True).asnumpy(),
        np.array([[0.1, 0.2, 0.3, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9, 1]]))

    ce1.update_token_vectors(['a', 'b'],
                             nd.array([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]))

    assert_almost_equal(
        ce1.idx_to_vec.asnumpy(),
        np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                  [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]))

    assertRaises(ValueError, ce1.update_token_vectors, 'unknown$$$',
                 nd.array([0, 0, 0, 0, 0]))

    assertRaises(AssertionError, ce1.update_token_vectors, '<unk>',
                 nd.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]))

    assertRaises(AssertionError, ce1.update_token_vectors, '<unk>',
                 nd.array([0]))

    ce1.update_token_vectors(['<unk>'], nd.array([0, 0, 0, 0, 0]))
    assert_almost_equal(
        ce1.idx_to_vec.asnumpy(),
        np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                  [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]))
    ce1.update_token_vectors(['<unk>'], nd.array([[10, 10, 10, 10, 10]]))
    assert_almost_equal(
        ce1.idx_to_vec.asnumpy(),
        np.array([[10, 10, 10, 10, 10], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                  [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]))
    ce1.update_token_vectors('<unk>', nd.array([0, 0, 0, 0, 0]))
    assert_almost_equal(
        ce1.idx_to_vec.asnumpy(),
        np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                  [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]))
    ce1.update_token_vectors('<unk>', nd.array([[10, 10, 10, 10, 10]]))
    assert_almost_equal(
        ce1.idx_to_vec.asnumpy(),
        np.array([[10, 10, 10, 10, 10], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                  [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1]]))
def test_LibSVMIter():

    def check_libSVMIter_synthetic():
        cwd = os.getcwd()
        data_path = os.path.join(cwd, 'data.t')
        label_path = os.path.join(cwd, 'label.t')
        with open(data_path, 'w') as fout:
            fout.write('1.0 0:0.5 2:1.2\n')
            fout.write('-2.0\n')
            fout.write('-3.0 0:0.6 1:2.4 2:1.2\n')
            fout.write('4 2:-1.2\n')

        with open(label_path, 'w') as fout:
            fout.write('1.0\n')
            fout.write('-2.0 0:0.125\n')
            fout.write('-3.0 2:1.2\n')
            fout.write('4 1:1.0 2:-1.2\n')

        data_dir = os.path.join(cwd, 'data')
        data_train = mx.io.LibSVMIter(data_libsvm=data_path, label_libsvm=label_path,
                                      data_shape=(3, ), label_shape=(3, ), batch_size=3)

        first = mx.nd.array([[0.5, 0., 1.2], [0., 0., 0.], [0.6, 2.4, 1.2]])
        second = mx.nd.array([[0., 0., -1.2], [0.5, 0., 1.2], [0., 0., 0.]])
        i = 0
        for batch in iter(data_train):
            expected = first.asnumpy() if i == 0 else second.asnumpy()
            data = data_train.getdata()
            data.check_format(True)
            assert_almost_equal(data.asnumpy(), expected)
            i += 1

    def check_libSVMIter_news_data():
        news_metadata = {
            'name': 'news20.t',
            'origin_name': 'news20.t.bz2',
            'url': "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/news20.t.bz2",
            'feature_dim': 62060 + 1,
            'num_classes': 20,
            'num_examples': 3993,
        }
        batch_size = 33
        num_examples = news_metadata['num_examples']
        data_dir = os.path.join(os.getcwd(), 'data')
        get_bz2_data(data_dir, news_metadata['name'], news_metadata['url'],
                     news_metadata['origin_name'])
        path = os.path.join(data_dir, news_metadata['name'])
        data_train = mx.io.LibSVMIter(data_libsvm=path, data_shape=(news_metadata['feature_dim'],),
                                      batch_size=batch_size)
        for epoch in range(2):
            num_batches = 0
            for batch in data_train:
                # check the range of labels
                data = batch.data[0]
                label = batch.label[0]
                data.check_format(True)
                assert(np.sum(label.asnumpy() > 20) == 0)
                assert(np.sum(label.asnumpy() <= 0) == 0)
                num_batches += 1
            expected_num_batches = num_examples / batch_size
            assert(num_batches == int(expected_num_batches)), num_batches
            data_train.reset()

    def check_libSVMIter_exception():
        cwd = os.getcwd()
        data_path = os.path.join(cwd, 'data.t')
        label_path = os.path.join(cwd, 'label.t')
        with open(data_path, 'w') as fout:
            fout.write('1.0 0:0.5 2:1.2\n')
            fout.write('-2.0\n')
            # Below line has a neg indice. Should throw an exception
            fout.write('-3.0 -1:0.6 1:2.4 2:1.2\n')
            fout.write('4 2:-1.2\n')

        with open(label_path, 'w') as fout:
            fout.write('1.0\n')
            fout.write('-2.0 0:0.125\n')
            fout.write('-3.0 2:1.2\n')
            fout.write('4 1:1.0 2:-1.2\n')
        data_dir = os.path.join(cwd, 'data')
        data_train = mx.io.LibSVMIter(data_libsvm=data_path, label_libsvm=label_path,
                                      data_shape=(3, ), label_shape=(3, ), batch_size=3)
        for batch in iter(data_train):
            data_train.get_data().asnumpy()

    check_libSVMIter_synthetic()
    check_libSVMIter_news_data()
    assertRaises(MXNetError, check_libSVMIter_exception)
def test_sparse_nd_check_format():
    """ test check_format for sparse ndarray """
    shape = rand_shape_2d()
    stypes = ["csr", "row_sparse"]
    for stype in stypes:
        arr, _ = rand_sparse_ndarray(shape, stype)
        arr.check_format()
        arr = mx.nd.sparse.zeros(stype, shape)
        arr.check_format()
    # CSR format index pointer array should be less than the number of rows
    shape = (3, 4)
    data_list = [7, 8, 9]
    indices_list = [0, 2, 1]
    indptr_list = [0, 5, 2, 3]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format indices should be in ascending order per row
    indices_list = [2, 1, 1]
    indptr_list = [0, 2, 2, 3]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format indptr should end with value equal with size of indices
    indices_list = [1, 2, 1]
    indptr_list = [0, 2, 2, 4]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format indices should not be negative
    indices_list = [0, 2, 1]
    indptr_list = [0, -2, 2, 3]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # Row Sparse format indices should be less than the number of rows
    shape = (3, 2)
    data_list = [[1, 2], [3, 4]]
    indices_list = [1, 4]
    a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # Row Sparse format indices should be in ascending order
    indices_list = [1, 0]
    a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # Row Sparse format indices should not be negative
    indices_list = [1, -2]
    a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
def test_sparse_nd_check_format():
    """ test check_format for sparse ndarray """
    shape = rand_shape_2d()
    stypes = ["csr", "row_sparse"]
    for stype in stypes:
        arr, _ = rand_sparse_ndarray(shape, stype)
        arr.check_format()
        arr = mx.nd.sparse.zeros(stype, shape)
        arr.check_format()
    # CSR format index pointer array should be less than the number of rows
    shape = (3, 4)
    data_list = [7, 8, 9]
    indices_list = [0, 2, 1]
    indptr_list = [0, 5, 2, 3]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list),
                                shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format indices should be in ascending order per row
    indices_list = [2, 1, 1]
    indptr_list = [0, 2, 2, 3]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list),
                                shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format indptr should end with value equal with size of indices
    indices_list = [1, 2, 1]
    indptr_list = [0, 2, 2, 4]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list),
                                shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format indices should not be negative
    indices_list = [0, 2, 1]
    indptr_list = [0, -2, 2, 3]
    a = mx.nd.sparse.csr_matrix((data_list, indices_list, indptr_list),
                                shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # CSR format should be 2 Dimensional.
    a = mx.nd.array([1, 2, 3])
    assertRaises(ValueError, a.tostype, 'csr')
    a = mx.nd.array([[[1, 2, 3]]])
    assertRaises(ValueError, a.tostype, 'csr')
    # Row Sparse format indices should be less than the number of rows
    shape = (3, 2)
    data_list = [[1, 2], [3, 4]]
    indices_list = [1, 4]
    a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # Row Sparse format indices should be in ascending order
    indices_list = [1, 0]
    a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
    # Row Sparse format indices should not be negative
    indices_list = [1, -2]
    a = mx.nd.sparse.row_sparse_array((data_list, indices_list), shape=shape)
    assertRaises(mx.base.MXNetError, a.check_format)
def test_vocabulary():
    counter = Counter(['a', 'b', 'b', 'c', 'c', 'c', 'some_word$'])

    v1 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v1) == 5
    assert v1.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3, 'some_word$': 4}
    assert v1.idx_to_token[1] == 'c'
    assert v1.unknown_token == '<unk>'
    assert v1.reserved_tokens is None

    v2 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v2) == 3
    assert v2.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2}
    assert v2.idx_to_token[1] == 'c'
    assert v2.unknown_token == '<unk>'
    assert v2.reserved_tokens is None

    v3 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=100, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v3) == 1
    assert v3.token_to_idx == {'<unk>': 0}
    assert v3.idx_to_token[0] == '<unk>'
    assert v3.unknown_token == '<unk>'
    assert v3.reserved_tokens is None

    v4 = text.vocab.Vocabulary(counter, most_freq_count=2, min_freq=1, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v4) == 3
    assert v4.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2}
    assert v4.idx_to_token[1] == 'c'
    assert v4.unknown_token == '<unk>'
    assert v4.reserved_tokens is None

    v5 = text.vocab.Vocabulary(counter, most_freq_count=3, min_freq=1, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v5) == 4
    assert v5.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3}
    assert v5.idx_to_token[1] == 'c'
    assert v5.unknown_token == '<unk>'
    assert v5.reserved_tokens is None

    v6 = text.vocab.Vocabulary(counter, most_freq_count=100, min_freq=1, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v6) == 5
    assert v6.token_to_idx == {'<unk>': 0, 'c': 1, 'b': 2, 'a': 3,
                               'some_word$': 4}
    assert v6.idx_to_token[1] == 'c'
    assert v6.unknown_token == '<unk>'
    assert v6.reserved_tokens is None

    v7 = text.vocab.Vocabulary(counter, most_freq_count=1, min_freq=2, unknown_token='<unk>',
                               reserved_tokens=None)
    assert len(v7) == 2
    assert v7.token_to_idx == {'<unk>': 0, 'c': 1}
    assert v7.idx_to_token[1] == 'c'
    assert v7.unknown_token == '<unk>'
    assert v7.reserved_tokens is None

    assertRaises(AssertionError, text.vocab.Vocabulary, counter, most_freq_count=None,
                 min_freq=0, unknown_token='<unknown>', reserved_tokens=['b'])

    assertRaises(AssertionError, text.vocab.Vocabulary, counter, most_freq_count=None,
                 min_freq=1, unknown_token='<unknown>', reserved_tokens=['b', 'b'])

    assertRaises(AssertionError, text.vocab.Vocabulary, counter, most_freq_count=None,
                 min_freq=1, unknown_token='<unknown>', reserved_tokens=['b', '<unknown>'])

    v8 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=1, unknown_token='<unknown>',
                               reserved_tokens=['b'])
    assert len(v8) == 5
    assert v8.token_to_idx == {'<unknown>': 0, 'b': 1, 'c': 2, 'a': 3, 'some_word$': 4}
    assert v8.idx_to_token[1] == 'b'
    assert v8.unknown_token == '<unknown>'
    assert v8.reserved_tokens == ['b']

    v9 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='<unk>',
                               reserved_tokens=['b', 'a'])
    assert len(v9) == 4
    assert v9.token_to_idx == {'<unk>': 0, 'b': 1, 'a': 2, 'c': 3}
    assert v9.idx_to_token[1] == 'b'
    assert v9.unknown_token == '<unk>'
    assert v9.reserved_tokens == ['b', 'a']

    v10 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=100, unknown_token='<unk>',
                                reserved_tokens=['b', 'c'])
    assert len(v10) == 3
    assert v10.token_to_idx == {'<unk>': 0, 'b': 1, 'c': 2}
    assert v10.idx_to_token[1] == 'b'
    assert v10.unknown_token == '<unk>'
    assert v10.reserved_tokens == ['b', 'c']

    v11 = text.vocab.Vocabulary(counter, most_freq_count=1, min_freq=2, unknown_token='<unk>',
                                reserved_tokens=['<pad>', 'b'])
    assert len(v11) == 4
    assert v11.token_to_idx == {'<unk>': 0, '<pad>': 1, 'b': 2, 'c': 3}
    assert v11.idx_to_token[1] == '<pad>'
    assert v11.unknown_token == '<unk>'
    assert v11.reserved_tokens == ['<pad>', 'b']

    v12 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='b',
                                reserved_tokens=['<pad>'])
    assert len(v12) == 3
    assert v12.token_to_idx == {'b': 0, '<pad>': 1, 'c': 2}
    assert v12.idx_to_token[1] == '<pad>'
    assert v12.unknown_token == 'b'
    assert v12.reserved_tokens == ['<pad>']

    v13 = text.vocab.Vocabulary(counter, most_freq_count=None, min_freq=2, unknown_token='a',
                                reserved_tokens=['<pad>'])
    assert len(v13) == 4
    assert v13.token_to_idx == {'a': 0, '<pad>': 1, 'c': 2, 'b': 3}
    assert v13.idx_to_token[1] == '<pad>'
    assert v13.unknown_token == 'a'
    assert v13.reserved_tokens == ['<pad>']

    counter_tuple = Counter([('a', 'a'), ('b', 'b'), ('b', 'b'), ('c', 'c'), ('c', 'c'), ('c', 'c'),
                             ('some_word$', 'some_word$')])

    v14 = text.vocab.Vocabulary(counter_tuple, most_freq_count=None, min_freq=1,
                                unknown_token=('<unk>', '<unk>'), reserved_tokens=None)
    assert len(v14) == 5
    assert v14.token_to_idx == {('<unk>', '<unk>'): 0, ('c', 'c'): 1, ('b', 'b'): 2, ('a', 'a'): 3,
                                ('some_word$', 'some_word$'): 4}
    assert v14.idx_to_token[1] == ('c', 'c')
    assert v14.unknown_token == ('<unk>', '<unk>')
    assert v14.reserved_tokens is None