def test_lemmatize_atomic_table(): while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break vector = lemmatize_atomic_table(atomic_table) assert len(vector) > len(atomic_table[0]) + len(atomic_table[1])
def test_categorize_table(): while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break categories = categorize_table(atomic_table) assert len(categories) > 0
def test_get_top_hash(): while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break _hash_simple = get_top_hash(atomic_table, vectorization_type="simple") _hash_lemmatize = get_top_hash(atomic_table, vectorization_type="lemmatize") assert isinstance(_hash_simple, numpy.uint64) assert isinstance(_hash_lemmatize, numpy.uint64)
def test_vectorize_atomic_table(): while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break vector = vectorize_atomic_table(atomic_table) assert isinstance(vector, list) assert isinstance(vector[0], bytes) assert len(vector) > 0
def test_load_random_atomic_table(): """Tables without subject column were split into single columns""" while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break _ID = generate_atomic_table_id(atomic_table) assert atomic_table_id == _ID assert len(atomic_table[0]) == len(atomic_table[1]) assert len(atomic_table) == 2
def test_get_hash_values(): while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break hash_values_simple = get_hash_values(atomic_table, vectorization_type="simple") hash_values_lemmatize = get_hash_values(atomic_table, vectorization_type="lemmatize") assert len(hash_values_simple) == 128 assert len(hash_values_lemmatize) == 128 assert isinstance(hash_values_simple[0], numpy.uint64) assert isinstance(hash_values_lemmatize[0], numpy.uint64)
def test_lemmatize_table(): while True: (atomic_table_id, atomic_table) = load_random_atomic_table() if isinstance(atomic_table[0], list): break lemmas = lemmatize_table(atomic_table)