def test_Parser_tokenize_only_words(): test_input = "Eighty-seven miles_to go, yet. OnWard! No.Stopping.here" expected_result = [ 'Eighty', 'seven', 'miles_to', 'go', 'yet', 'OnWard', 'No', 'Stopping', 'here' ] parser = Parser([]) parser.tokenize_only_words(test_input) assert parser.get_parsed_data() == expected_result
def test_Parser_tokenize_words_and_characters(): test_input = "@Override \n public boolean makeChange(BasePanel panel,BibDatabase secondary, NamedCompound undoEdit) { \n if (onDisk == null) {" expected_result = [ '@', 'Override', 'public', 'boolean', 'makeChange', '(', 'BasePanel', 'panel', ',', 'BibDatabase', 'secondary', ',', 'NamedCompound', 'undoEdit', ')', '{', 'if', '(', 'onDisk', '==', 'null', ')', '{' ] parser = Parser() parser.tokenize_words_and_characters(test_input) assert parser.get_parsed_data() == expected_result
def test_Parser_tokenize_everything_from_parsed_data(): test_input = [ "@Override \n public boolean makeChange(BasePanel ", " panel,BibDatabase secondary, NamedCompound undoEdit) { \n ", "if (onDisk == null) {", ] expected_result = [ '@', 'Override', 'public', 'boolean', 'makeChange', '(', 'BasePanel', 'panel', ',', 'BibDatabase', 'secondary', ',', 'NamedCompound', 'undoEdit', ')', '{', 'if', '(', 'onDisk', '==', 'null', ')', '{' ] parser = Parser(test_input) parser.tokenize_everything_from_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_lower_case_parsed_data(): test_input = [ 'EntryChange', 'LogFactory', 'isModifiedLocally', 'memEntry', 'getFieldNames', 'alltogether', 'weather', 'Logger', 'JComponent', ] expected_result = [ 'entrychange', 'logfactory', 'ismodifiedlocally', 'mementry', 'getfieldnames', 'alltogether', 'weather', 'logger', 'jcomponent', ] parser = Parser() parser.set_data(test_input) parser.lower_case_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_stem_words_from_parsed_data(): test_input = [ 'It', 'is', 'important', 'to', 'by', 'very', 'pythonly', 'while', 'you', 'are', 'pythoning', 'with', 'python', '.', 'All', 'pythoners', 'have', 'pythoned', 'poorly', 'at', 'least', 'once', '.', ] expected_result = [ 'It', 'is', 'import', 'to', 'by', 'veri', 'pythonli', 'while', 'you', 'are', 'python', 'with', 'python', '.', 'all', 'python', 'have', 'python', 'poorli', 'at', 'least', 'onc', '.', ] parser = Parser() parser.set_data(test_input) parser.stem_words_from_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_remove_stopwords_from_parsed_data(): test_input = [ 'import', 'sf.sp', 'program', 'package', 'if', 'its', 'true', 'return', 'falsehopes', 'at', 'how', 'will' ] expected_result = [ 'import', 'sf.sp', 'program', 'package', 'true', 'return', 'falsehopes' ] parser = Parser() parser.set_data(test_input) parser.remove_stopwords_from_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_remove_java_keywords_from_parsed_data(): test_input = [ "import", "sf.sp", "program", "package", "if", "its", "true", "return", "falsehopes", ] expected_result = [ "sf.sp", "program", "its", "falsehopes", ] parser = Parser() parser.set_data(test_input) parser.remove_java_keywords_from_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_separate_compound_strings_from_parsed_data(): test_input = [ 'EntryChange', 'LogFactory', 'isModifiedLocally', 'memEntry', 'getFieldNames', 'alltogether', 'weather', 'Logger', 'JComponent', 'POISX', ] expected_result = [ 'Entry', 'Change', 'Log', 'Factory', 'is', 'Modified', 'Locally', 'mem', 'Entry', 'get', 'Field', 'Names', 'alltogether', 'weather', 'Logger', 'JComponent', 'POISX', ] parser = Parser() parser.set_data(test_input) parser.separate_compound_words_from_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_remove_numeric_items_from_parsed_data(): test_input = [ "23", "import", "sp.sf.collab", "variable3", ";", "num14", "1", ".", ] expected_result = [ "import", "sp.sf.collab", "variable3", ";", "num14", ".", ] parser = Parser() parser.set_data(test_input) parser.remove_numeric_items_from_parsed_data() assert parser.get_parsed_data() == expected_result
def test_Parser_single_characters_from_parsed_data(): test_input = [ "package", "net", ".", "collab", ";", "function", "(", "c", ")", "}", ] expected_result = [ "package", "net", "collab", "function", ] parser = Parser() parser.set_data(test_input) parser.remove_single_characters_from_parsed_data() assert parser.get_parsed_data() == expected_result