Example #1
0
def test_Parser_tokenize_only_words():
    test_input = "Eighty-seven miles_to go, yet.  OnWard! No.Stopping.here"
    expected_result = [
        'Eighty', 'seven', 'miles_to', 'go', 'yet', 'OnWard', 'No', 'Stopping',
        'here'
    ]

    parser = Parser([])
    parser.tokenize_only_words(test_input)
    assert parser.get_parsed_data() == expected_result
Example #2
0
def test_Parser_tokenize_words_and_characters():
    test_input = "@Override \n public boolean makeChange(BasePanel panel,BibDatabase secondary, NamedCompound undoEdit) { \n if (onDisk == null) {"

    expected_result = [
        '@', 'Override', 'public', 'boolean', 'makeChange', '(', 'BasePanel',
        'panel', ',', 'BibDatabase', 'secondary', ',', 'NamedCompound',
        'undoEdit', ')', '{', 'if', '(', 'onDisk', '==', 'null', ')', '{'
    ]
    parser = Parser()
    parser.tokenize_words_and_characters(test_input)
    assert parser.get_parsed_data() == expected_result
Example #3
0
def test_Parser_tokenize_everything_from_parsed_data():
    test_input = [
        "@Override \n public boolean makeChange(BasePanel ",
        " panel,BibDatabase secondary, NamedCompound undoEdit) { \n ",
        "if (onDisk == null) {",
    ]

    expected_result = [
        '@', 'Override', 'public', 'boolean', 'makeChange', '(', 'BasePanel',
        'panel', ',', 'BibDatabase', 'secondary', ',', 'NamedCompound',
        'undoEdit', ')', '{', 'if', '(', 'onDisk', '==', 'null', ')', '{'
    ]

    parser = Parser(test_input)
    parser.tokenize_everything_from_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #4
0
def test_Parser_lower_case_parsed_data():
    test_input = [
        'EntryChange',
        'LogFactory',
        'isModifiedLocally',
        'memEntry',
        'getFieldNames',
        'alltogether',
        'weather',
        'Logger',
        'JComponent',
    ]
    expected_result = [
        'entrychange',
        'logfactory',
        'ismodifiedlocally',
        'mementry',
        'getfieldnames',
        'alltogether',
        'weather',
        'logger',
        'jcomponent',
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.lower_case_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #5
0
def test_Parser_stem_words_from_parsed_data():
    test_input = [
        'It',
        'is',
        'important',
        'to',
        'by',
        'very',
        'pythonly',
        'while',
        'you',
        'are',
        'pythoning',
        'with',
        'python',
        '.',
        'All',
        'pythoners',
        'have',
        'pythoned',
        'poorly',
        'at',
        'least',
        'once',
        '.',
    ]

    expected_result = [
        'It',
        'is',
        'import',
        'to',
        'by',
        'veri',
        'pythonli',
        'while',
        'you',
        'are',
        'python',
        'with',
        'python',
        '.',
        'all',
        'python',
        'have',
        'python',
        'poorli',
        'at',
        'least',
        'onc',
        '.',
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.stem_words_from_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #6
0
def test_Parser_remove_stopwords_from_parsed_data():
    test_input = [
        'import', 'sf.sp', 'program', 'package', 'if', 'its', 'true', 'return',
        'falsehopes', 'at', 'how', 'will'
    ]
    expected_result = [
        'import', 'sf.sp', 'program', 'package', 'true', 'return', 'falsehopes'
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.remove_stopwords_from_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #7
0
def test_Parser_remove_java_keywords_from_parsed_data():
    test_input = [
        "import",
        "sf.sp",
        "program",
        "package",
        "if",
        "its",
        "true",
        "return",
        "falsehopes",
    ]
    expected_result = [
        "sf.sp",
        "program",
        "its",
        "falsehopes",
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.remove_java_keywords_from_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #8
0
def test_Parser_separate_compound_strings_from_parsed_data():
    test_input = [
        'EntryChange',
        'LogFactory',
        'isModifiedLocally',
        'memEntry',
        'getFieldNames',
        'alltogether',
        'weather',
        'Logger',
        'JComponent',
        'POISX',
    ]
    expected_result = [
        'Entry',
        'Change',
        'Log',
        'Factory',
        'is',
        'Modified',
        'Locally',
        'mem',
        'Entry',
        'get',
        'Field',
        'Names',
        'alltogether',
        'weather',
        'Logger',
        'JComponent',
        'POISX',
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.separate_compound_words_from_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #9
0
def test_Parser_remove_numeric_items_from_parsed_data():
    test_input = [
        "23",
        "import",
        "sp.sf.collab",
        "variable3",
        ";",
        "num14",
        "1",
        ".",
    ]
    expected_result = [
        "import",
        "sp.sf.collab",
        "variable3",
        ";",
        "num14",
        ".",
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.remove_numeric_items_from_parsed_data()
    assert parser.get_parsed_data() == expected_result
Example #10
0
def test_Parser_single_characters_from_parsed_data():
    test_input = [
        "package",
        "net",
        ".",
        "collab",
        ";",
        "function",
        "(",
        "c",
        ")",
        "}",
    ]
    expected_result = [
        "package",
        "net",
        "collab",
        "function",
    ]

    parser = Parser()
    parser.set_data(test_input)
    parser.remove_single_characters_from_parsed_data()
    assert parser.get_parsed_data() == expected_result