Beispiel #1
0
def test_demeuk():
    testargs = [
        'demeuk', '-i', 'testdata/input1', '-o', 'testdata/output1', '-l',
        'testdata/log1'
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_input1 = calculate_line_numbers('testdata/input1')
    line_num_output1 = calculate_line_numbers('testdata/output1')
    line_num_log1 = calculate_line_numbers('testdata/log1')

    assert line_num_log1 == 4
    assert line_num_output1 == 9
    assert line_num_input1 == (line_num_output1 + line_num_log1)
    with open('testdata/output1') as file:
        filecontent = file.read()
        assert 'Password123!@"\n' in filecontent
        assert 'ǓǝǪǼȧɠ\n' in filecontent
        assert 'ʄʛʨʾϑϡϣЄ\n' in filecontent
        assert 'ϽϾϿЀЁЂЃЄЅІЇЈ\n' in filecontent
        assert '做戏之说\n' in filecontent
        assert 'Hyggelig123åmøtedeg!\n' in filecontent
        assert 'бонусов$123\n' in filecontent
        assert '!!!ееместной%%@!\n' in filecontent
Beispiel #2
0
def test_check_hash():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input23',
        '-o',
        'testdata/output23',
        '-l',
        'testdata/log23',
        '--verbose',
        '--check-hash',
        '-c',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output23') as f:
        filecontent = f.read()
        assert 'baabe00a81fc405af4ab9b0f99615498' not in filecontent
        assert '$h$7/uhfibmxg83yq6y1rh5y9wjee13kh.' not in filecontent
        assert '$6$/fasjdfsadj$safjasdfasjdfa' not in filecontent
        assert '$1$Tx6cx/cA$ouWREOn7' not in filecontent
        assert 'changeme!' in filecontent
        assert 'line5' not in filecontent
        assert '12345678' in filecontent
        assert 'aaaaaa' in filecontent
        assert '$aaa$test' in filecontent
        assert '$H$8abc' in filecontent
        assert '$2a$10$bcrypt' not in filecontent
        assert '$pizza$like' in filecontent
Beispiel #3
0
def test_delimiter():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input10',
        '-o',
        'testdata/output10',
        '-l',
        'testdata/log10',
        '--cut',
        '--delimiter',
        '/',
        '--cut-before',
        '--check-min-length',
        '1',
        '--check-max-length',
        '10',
        '--check-case',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    line_num_output = calculate_line_numbers('testdata/output10')
    assert line_num_output == 1
    with open('testdata/output10') as f:
        filecontent = f.read()
        assert 'cijfer\n' in filecontent
        assert '3M\n' not in filecontent
        assert 'VERYVERYVERYVERYVERYVERYLONGLINE?\n' not in filecontent
Beispiel #4
0
def test_output_encoding():
    testargs = [
        'demeuk', '-i', 'testdata/input1', '-o', 'testdata/output1',
        '--output-encoding', 'C'
    ]
    with patch.object(sys, 'argv', testargs):
        with raises(UnicodeEncodeError):
            main()
Beispiel #5
0
def test_tabchar():
    testargs = ['demeuk', '-i', 'testdata/input4', '-o', 'testdata/output4']
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_output1 = calculate_line_numbers('testdata/output4')
    assert line_num_output1 == 2
    with open('testdata/output4') as file:
        filecontent = file.read()
        assert 'line:entry\n' in filecontent
        assert 'line2:entry2\n' in filecontent
Beispiel #6
0
def test_multithread():
    testargs = [
        'demeuk', '-i', 'testdata/input2', '-o', 'testdata/output2', '-j', '3'
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_input1 = calculate_line_numbers('testdata/input2')
    line_num_output1 = calculate_line_numbers('testdata/output2')

    assert line_num_output1 == 8
    assert line_num_input1 == line_num_output1
Beispiel #7
0
def test_fries():
    testargs = [
        'demeuk', '-i', 'testdata/input12', '-o', 'testdata/output12', '-l',
        'testdata/log12', '--no-mojibake'
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/log12') as f:
        filecontent = f.read()
        assert 'West-Frysl' in filecontent
    with open('testdata/output12') as f:
        filecontent = f.read()
        assert 'West-Frysl‰n' not in filecontent
Beispiel #8
0
def test_input_encoding():
    testargs = [
        'demeuk', '-i', 'testdata/input9', '-o', 'testdata/output9',
        '--input-encoding', 'windows-1251,UTF-16'
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    line_num_output = calculate_line_numbers('testdata/output9')
    assert line_num_output == 2
    with open('testdata/output9') as f:
        filecontent = f.read()
        assert '16THEBEST!!!\n' in filecontent
        assert '!!!ееместной%%@!\n' in filecontent
Beispiel #9
0
def test_newline():
    testargs = ['demeuk', '-i', 'testdata/input3', '-o', 'testdata/output3']
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_input1 = calculate_line_numbers('testdata/input3')
    line_num_output1 = calculate_line_numbers('testdata/output3')

    assert line_num_output1 == 8
    assert line_num_input1 == line_num_output1
    with open('testdata/output3') as file:
        filecontent = file.read()
        for x in range(7):
            assert f'line{x}\n' in filecontent
Beispiel #10
0
def test_coupe():
    testargs = [
        'demeuk', '-i', 'testdata/input7', '-o', 'testdata/output7', '-l',
        'testdata/log7'
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_output = calculate_line_numbers('testdata/output7')
    assert line_num_output == 2
    with open('testdata/output7') as f:
        filecontent = f.read()
        assert 'coupÉ' in filecontent
        assert 'LANCIA AURELIA B20 COUPÉ GT\n' in filecontent
Beispiel #11
0
def test_googlengram():
    testargs = [
        'demeuk', '-i', 'testdata/input6', '-o', 'testdata/output6', '-g'
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    line_num_output = calculate_line_numbers('testdata/output6')
    assert line_num_output == 4
    with open('testdata/output6') as f:
        filecontent = f.read()
        assert 'I\'ain\n' in filecontent
        assert 'I\'Afrique occidental\n' in filecontent
        assert 'I\'Allemagne\n' in filecontent
        assert 'I\'ain a\n' in filecontent
Beispiel #12
0
def test_split():
    testargs = [
        'demeuk', '-i', 'testdata/input8', '-o', 'testdata/output8', '-c'
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_output = calculate_line_numbers('testdata/output8')
    assert line_num_output == 4
    with open('testdata/output8') as f:
        filecontent = f.read()
        assert 'example.com' not in filecontent
        assert 'sub.example.com' not in filecontent
        assert 'example.guru' not in filecontent
        assert 'sub.test-example.com' not in filecontent
Beispiel #13
0
def test_split_email():
    testargs = [
        'demeuk', '-i', 'testdata/input5', '-o', 'testdata/output5',
        '--remove-email', '-c'
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    line_num_output = calculate_line_numbers('testdata/output5')
    assert line_num_output == 6
    with open('testdata/output5') as file:
        filecontent = file.read()
        assert 'line1\n' in filecontent
        assert '*****@*****.**' not in filecontent
        assert 'alcatel-sbell' not in filecontent
        assert '\nline4\n' in filecontent
        assert '\nline5\n' in filecontent
        assert '\nline6\n' in filecontent
Beispiel #14
0
def test_limit():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input19',
        '-o',
        'testdata/output19',
        '-l',
        'testdata/log19',
        '--limit',
        '5',
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    line_num_output = calculate_line_numbers('testdata/output19')
    assert line_num_output == 5
Beispiel #15
0
def test_check_non_ascii():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input25',
        '-o',
        'testdata/output25',
        '-l',
        'testdata/log25',
        '--verbose',
        '--check-non-ascii',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output25') as f:
        filecontent = f.read()
        assert 'laténight' not in filecontent
        assert 'thestrokes' in filecontent
Beispiel #16
0
def test_glob():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input*',
        '-o',
        'testdata/output27',
        '-l',
        'testdata/log27',
        '--verbose',
        '-c',
        '-d',
        ',;:',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output27') as f:
        assert len(f.readlines()) == 115
Beispiel #17
0
def test_cut_fields_single():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input14',
        '-o',
        'testdata/output14',
        '-l',
        'testdata/log14',
        '-f',
        '5',
        '-c',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output14') as f:
        filecontent = f.read()
        assert 'field5\n' in filecontent
        assert 'field4' not in filecontent
Beispiel #18
0
def test_verbose():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input18',
        '-o',
        'testdata/output18',
        '-l',
        'testdata/log18',
        '-f',
        '5-',
        '-c',
        '--verbose',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/log18') as f:
        filecontent = f.read()
        assert 'Clean_cut; ' in filecontent
Beispiel #19
0
def test_clean_add_umlaut():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input20',
        '-o',
        'testdata/output20',
        '-l',
        'testdata/log20',
        '--add-umlaut',
        '--verbose',
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    with open('testdata/output20') as f:
        filecontent = f.read()
        assert 'Eselsbrücke' in filecontent
        assert 'Fremdschämen' in filecontent
        assert 'KÄSEHOCH' in filecontent
        assert 'KA"SEHOCH' in filecontent

    testargs = [
        'demeuk',
        '-i',
        'testdata/input20',
        '-o',
        'testdata/output20.2',
        '-l',
        'testdata/log20.2',
        '--umlaut',
        '--verbose',
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    with open('testdata/output20.2') as f:
        filecontent = f.read()
        assert 'Eselsbrücke' in filecontent
        assert 'Fremdschämen' in filecontent
        assert 'KÄSEHOCH' in filecontent
        assert 'KA"SEHOCH' not in filecontent
Beispiel #20
0
def test_unhex():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input15',
        '-o',
        'testdata/output15',
        '-l',
        'testdata/log15',
        '--hex',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output15') as f:
        filecontent = f.read()
        assert 'PEÑAROL\n' in filecontent
        assert 'QWERTYUIOPÅ\n' in filecontent
        assert 'Zsófi2000\n' in filecontent
        assert 'arañas\n' in filecontent
        assert '$HEX[' not in filecontent
Beispiel #21
0
def test_unhtml():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input16',
        '-o',
        'testdata/output16',
        '-l',
        'testdata/log16',
        '--html',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output16') as f:
        filecontent = f.read()
        assert 'İSMAİL\n' in filecontent
        assert 'İSTANBUL\n' in filecontent
        assert 'şifreyok\n' in filecontent
        assert 'α\n' not in filecontent
        assert '>\n' in filecontent
Beispiel #22
0
def test_language_processing():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input11',
        '-o',
        'testdata/output11',
        '-l',
        'testdata/log11',
        '--cut',
        '--delimiter',
        '/',
        '--cut-before',
        '--check-min-length',
        '2',
        '--remove-punctuation',
        '--add-lower',
        '--add-latin-ligatures',
        '--add-split',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    line_num_output = calculate_line_numbers('testdata/output11')
    assert line_num_output == 29
    with open('testdata/output11') as f:
        filecontent = f.read()
        assert 'cijfer\n' in filecontent
        assert 'cijfer\n' in filecontent
        assert '3M\n' in filecontent
        assert '3m\n' in filecontent
        assert '\ntest\n' in filecontent
        assert '3M-test\n' in filecontent
        assert 'St. Maarten\n' in filecontent
        assert 'St\n' in filecontent
        assert '\nMaarten\n' in filecontent
        assert 'Aai-Aai\n' in filecontent
        assert '3-hoekig\n' in filecontent
        assert '\nhoekig\n' in filecontent
        assert '3\n' not in filecontent
        assert 'Philipsburg.\n' not in filecontent
        assert 'Philipsburg\n' in filecontent
Beispiel #23
0
def test_clean_non_ascii():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input26',
        '-o',
        'testdata/output26',
        '-l',
        'testdata/log26',
        '--verbose',
        '--non-ascii',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output26') as f:
        filecontent = f.read()

        assert 'polopaç' not in filecontent
        assert 'mündster' not in filecontent
        assert 'polopac' in filecontent
        assert 'mundster' in filecontent
Beispiel #24
0
def test_check_bug_comma_d():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input24',
        '-o',
        'testdata/output24',
        '-l',
        'testdata/log24',
        '--verbose',
        '-c',
        '-d',
        ',;:',
    ]
    with patch.object(sys, 'argv', testargs):
        main()
    with open('testdata/output24') as f:
        filecontent = f.read()
        assert 'line1' not in filecontent
        assert 'angus' in filecontent
        assert 'line2' not in filecontent
        assert 'snow' in filecontent
Beispiel #25
0
def test_check_email():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input22',
        '-o',
        'testdata/output22',
        '-l',
        'testdata/log22',
        '--verbose',
        '--check-email',
        '--remove-email',
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    with open('testdata/output22') as f:
        filecontent = f.read()
        assert 'line1' in filecontent
        assert 'line2' not in filecontent
        assert 'line3' not in filecontent
        assert 'line4' not in filecontent
        assert 'line5\n' in filecontent
Beispiel #26
0
def test_multiple_delimiters():
    testargs = [
        'demeuk',
        '-i',
        'testdata/input21',
        '-o',
        'testdata/output21',
        '-l',
        'testdata/log20',
        '-c',
        '--verbose',
        '-d',
        ':,;,----',
    ]
    with patch.object(sys, 'argv', testargs):
        main()

    with open('testdata/output21') as f:
        filecontent = f.read()
        assert 'password\n' in filecontent
        assert 'password2\n' in filecontent
        assert 'password3\n' in filecontent
        assert 'user' not in filecontent