Esempio n. 1
0
def test_iterator():
    """Test the iterator of TextDirectory."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files()
    files = [file for file in td]
    assert len(files) == 10
    print(files[0]['path'].resolve())
    assert 'Text_' in str(files[0]['path'].resolve())
Esempio n. 2
0
def test_transformation_expand_contrations():
    """Test the expand English contractions transformation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.stage_transformation(['transformation_expand_english_contractions'])
    assert 'She is the one who flew to Mars.' in td.aggregate_to_memory()
    assert 'I will finish the spaceship in time.' in td.aggregate_to_memory()
Esempio n. 3
0
def test_print_pipeline(capsys):
    """"Test the print_pipeline function."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.filter_by_chars_outliers()
    td.print_pipeline()
    out, err = capsys.readouterr()
    assert 'filter_by_chars_outliers' in out
Esempio n. 4
0
def test_transformation_test_arguments():
    """Test whether we can pass arguments to transformations."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.stage_transformation([
        'transformation_remove_stopwords', 'internal', 'en', 'en_core_web_sm',
        'dolor,dolore,dolores'
    ])
    text = td.aggregate_to_memory()
    assert 'dolor' not in text
Esempio n. 5
0
def test_tabulation(capsys):
    """Test the tabulation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.print_aggregation()
    out, err = capsys.readouterr()
    assert 'path' in out
Esempio n. 6
0
def test_filter_by_similar_documents():
    """Test the similarity filter."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.filter_by_similar_documents(
        reference_file='textdirectory/data/testdata/level_2/Text_2_B.txt',
        threshold=0.7)
    assert len(td.aggregation) == 2
Esempio n. 7
0
def test_transformation_uppercase():
    """Test the uppercase transformation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.stage_transformation(['transformation_uppercase'])
    assert td.aggregate_to_memory().isupper()
Esempio n. 8
0
def test_transformation_remove_nl():
    """Test the remove_nl transformation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.stage_transformation(['transformation_remove_nl'])
    assert '\n' not in td.aggregate_to_memory()
Esempio n. 9
0
def test_filter_by_min_filesize():
    """Test the filesize (min) filter."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.filter_by_min_filesize(min_kb=2)
    assert len(td.aggregation) == 1
Esempio n. 10
0
def test_filter_by_chars_outliers():
    """Test the outlier filter."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.filter_by_chars_outliers(1)
    assert len(td.aggregation) == 9
Esempio n. 11
0
def test_simpple_aggregations():
    """Test the simplest form of aggregation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    assert len(td.aggregate_to_memory()) == 4179
Esempio n. 12
0
def test_transform_to_memory():
    """Test the in memory transformation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.transform_to_memory()
    assert len(list(td.get_aggregation())[0]['transformed_text']) > 0
Esempio n. 13
0
def test_transformation_lemmatize():
    """Test the lemmatize transformation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.stage_transformation(['transformation_lemmatize'])
    assert 'language be complicate' in td.aggregate_to_memory()
Esempio n. 14
0
def test_transformation_postag():
    """Test the postag transformation."""
    td = TextDirectory(directory='textdirectory/data/testdata/')
    td.load_files(True, 'txt')
    td.stage_transformation(['transformation_postag'])
    assert 'NN' in td.aggregate_to_memory()