Ejemplo n.º 1
0
def test_lemmatize_atomic_table():
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    vector = lemmatize_atomic_table(atomic_table)
    assert len(vector) > len(atomic_table[0]) + len(atomic_table[1])
Ejemplo n.º 2
0
def test_categorize_table():
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    categories = categorize_table(atomic_table)
    assert len(categories) > 0
Ejemplo n.º 3
0
def test_get_top_hash():
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    _hash_simple = get_top_hash(atomic_table, vectorization_type="simple")
    _hash_lemmatize = get_top_hash(atomic_table, vectorization_type="lemmatize")
    assert isinstance(_hash_simple, numpy.uint64)
    assert isinstance(_hash_lemmatize, numpy.uint64)
Ejemplo n.º 4
0
def test_vectorize_atomic_table():
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    vector = vectorize_atomic_table(atomic_table)
    assert isinstance(vector, list)
    assert isinstance(vector[0], bytes)
    assert len(vector) > 0
Ejemplo n.º 5
0
def test_load_random_atomic_table():
    """Tables without subject column were split into single columns"""
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    _ID = generate_atomic_table_id(atomic_table)
    assert atomic_table_id == _ID
    assert len(atomic_table[0]) == len(atomic_table[1])
    assert len(atomic_table) == 2
Ejemplo n.º 6
0
def test_get_hash_values():
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    hash_values_simple = get_hash_values(atomic_table, vectorization_type="simple")
    hash_values_lemmatize = get_hash_values(atomic_table, vectorization_type="lemmatize")
    assert len(hash_values_simple) == 128
    assert len(hash_values_lemmatize) == 128
    assert isinstance(hash_values_simple[0], numpy.uint64)
    assert isinstance(hash_values_lemmatize[0], numpy.uint64)
Ejemplo n.º 7
0
def test_lemmatize_table():
    while True:
        (atomic_table_id, atomic_table) = load_random_atomic_table()
        if isinstance(atomic_table[0], list):
            break
    lemmas = lemmatize_table(atomic_table)