Ejemplo n.º 1
0
def test_char_2d_valid_length(vocab):
    mxlen, mxwlen = np.random.randint(3, 15, size=2)
    my_len = np.random.randint(1, mxlen)
    input_ = ['a'] * my_len
    vect = Char2DVectorizer(mxlen=mxlen, mxwlen=mxwlen)
    _, lens = vect.run(input_, vocab)
    assert lens == my_len
Ejemplo n.º 2
0
def test_char_2d_cuts_off_mxwlen(vocab):
    mxlen = 2; mxwlen = 4
    input_ = ['aaaabbbb', 'cccc']
    gold = np.array([[vocab['a']] * mxwlen, [vocab['c']] * mxwlen], dtype=int)
    vect = Char2DVectorizer(mxlen=mxlen, mxwlen=mxwlen)
    res, _ = vect.run(input_, vocab)
    np.testing.assert_equal(res, gold)
Ejemplo n.º 3
0
def test_char_2d_run_values(vocab):
    mxlen, mxwlen = np.random.randint(3, 15, size=2)
    input_ = [chr(i + 97) * mxwlen for i in range(mxlen)]
    vect = Char2DVectorizer(mxlen=mxlen, mxwlen=mxwlen)
    res, _ = vect.run(input_, vocab)
    for i, word in enumerate(input_):
        for j, char in enumerate(word):
            assert res[i, j] == vocab[char]
Ejemplo n.º 4
0
 def __init__(self, nctx, chars_per_word):
     y_vectorizer = Token1DVectorizer(transform_fn=baseline.lowercase)
     x_vectorizer = Char2DVectorizer(mxwlen=chars_per_word)
     super(TensorCharDatasetReader, self).__init__(nctx, {
         'x': x_vectorizer,
         'y': y_vectorizer
     })
     self.chars_per_word = chars_per_word
Ejemplo n.º 5
0
def test_char_2d_cuts_off_mxlen(vocab):
    mxlen = 2; mxwlen = 4
    input_ = ['a', 'b', 'c']
    vect = Char2DVectorizer(mxlen=mxlen, mxwlen=mxwlen)
    res, _ = vect.run(input_, vocab)
    assert res.shape[0] == mxlen
    for i, char in enumerate(input_[:mxlen]):
        assert res[i, 0] == vocab[char]
    values = set(res.flatten().tolist())
    for char in input_[mxlen:]:
        assert vocab[char] not in values
Ejemplo n.º 6
0
def test_char_2d_shapes(vocab):
    mxlen, mxwlen = np.random.randint(1, 100, size=2)
    gold_shape = (mxlen, mxwlen)
    vect = Char2DVectorizer(mxlen=mxlen, mxwlen=mxwlen)
    res, _ = vect.run([''], vocab)
    assert res.shape == gold_shape