def search_by_phrase(input_phrase, language, transDict, Greek_word_num, Greek_search_dict, Greek_text, max_scoreboard_size, min_score): if not (valid_search(input_phrase)): return ERROR else: output_translation_matrix = [] output_translation_matrix.append([""]) # Save lemma to translations found found_translist = {} scoreKeeper = scoreboard(max_scoreboard_size, min_score) search = search_phrase(input_phrase, language) # Find all the translations of the given words for i in range(search.search_len): search.has_translation[i] = trn.get_translation(search.text[i], transDict, found_translist) xls.try_all_search_combos(search, scoreKeeper, Greek_word_num, Greek_search_dict, Greek_text, output_translation_matrix) #translations_of_search = translation_matrix_to_string(output_translation_matrix) output_translation_matrix.pop(0) return scoreKeeper, output_translation_matrix
def main(): transDict, Greek_word_num, Greek_search_dict, Greek_text = preprocessing() # Save lemma to translations found found_translist = {} try: while (True): scoreKeeper = scoreboard(MAX_SCOREBOARD_SIZE, MIN_SCORE) input_phrase = input("Enter Search Phrase> ") if re.sub(" ", "", re.sub("q", "", input_phrase)) == "" or re.sub(" ", "", re.sub("quit", "", input_phrase)) == "": exit(0) if (valid_search(input_phrase)): search = search_phrase(input_phrase, "Latin") # Find all the translations of the given words for i in range(search.search_len): search.has_translation[i] = trn.get_translation(search.text[i], transDict, found_translist) xls.try_all_search_combos(search, scoreKeeper, Greek_word_num, Greek_search_dict, Greek_text) print(scoreKeeper) else: print('Please enter a valid string\n') except KeyboardInterrupt: print('\nProgram Terminated\n') sys.exit(0)
def test_functions(): print("Starting unit testing of simpleXLing.py") tests = [] #====Build Search Dict====# #Attempts to build a search dictionary for a .txt file containing Latin words (Doesn't check lemmatized text file) curr_test = test("Build latin search dictionary (XLingFunctions.py)") latin_filename = "./test_files/small_latin.txt" words_in_file = 663 language = "Latin" lemmatized_version = False tests.append(test_build_search_dict(curr_test,latin_filename, words_in_file, language, lemmatized_version )) #Attempts to build a search dictionary for a .txt file containing Greek wors (Doesn't check lemmatized text file) curr_test = test("Build greek search dictionary (XLingFunctions.py)") greek_filename = "./test_files/small_greek.txt" word_in_file = 789 language = "Greek" lemmatized_version = False tests.append(test_build_search_dict(curr_test,greek_filename, words_in_file, language, lemmatized_version)) #Builds an arbitrary translation dictionary and attempts to find translation for a valid word pair curr_test = test("Get valid translations (translate.py)") LA = word("latina", 1, 3, None) LB = word("latinb", 2, 4, None) transDict = {} latin_a_translations = ["greek1", "greek2", "greek3"] latin_b_translations = ["greek4"] transDict["latina"] = latin_a_translations transDict["latinb"] = latin_b_translations result = trn.get_translation_pair(LA,LB,transDict) if result == -1: curr_test.passed = False error_message = "Translations for two valid dictionary entries were not found" curr_test.errors.append(error_message) if not (LA.translations == latin_a_translations): curr_test.passed = False error_message = "Latina.translations doesn't match the actual translations" curr_test.errors.append(error_message) if not (LB.translations == latin_b_translations): curr_test.passed = False error_message = "Latinb.translations doesn't match the actual translations" curr_test.errors.append(error_message) tests.append(curr_test) #Attempts to find a translation for an invalid word pair curr_test = test("Get invalid translations (translate.py)") LC = word("latinc", 2, 3, None) result = trn.get_translation_pair(LA,LC,transDict) if not (result == -1): curr_test.passed = False error_message = "Translations for two valid dictionary entries were not found" curr_test.errors.append(error_message) if not (LA.translations == latin_a_translations): curr_test.passed = False error_message = "Latin_a.translations doesn't match the actual translations" curr_test.errors.append(error_message) if not (LC.translations == None): curr_test.passed = False error_message = "Latin_c. shouldn't have any translations" curr_test.errors.append(error_message) tests.append(curr_test) #Search for a translation of a pair of latin words in a greek search dictionary curr_test = test("Get Greek translation pair (XLingFunctions.py)") L1_translation = "greek1" L2_translation = "greek2" G_search_dict = { 'greek1' : [1,3], 'greek2' : [2] } G1, G2 = xls.get_GreekPair(L1_translation, L2_translation, G_search_dict) if not (G1 == [1,3]): curr_test.passed = False curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect") if not (G2 == [2]): curr_test.passed = False curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect") tests.append(curr_test) #Search for a translation of a pair of latin words in a greek search dictionary (when translations aren't in dictionary) curr_test = test("Get Greek translation pair with incomplete search dict (XLingFunctions.py)") L3_translation = "greek3" G1, G3 = xls.get_GreekPair(L1_translation, L3_translation, G_search_dict) if not (G1 == None): curr_test.passed = False curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect") if not (G3 == None): curr_test.passed = False curr_test.errors.append("Translation array (array of indices in greek text where translation of latin word appear) is incorrect") tests.append(curr_test) #Get latin word stats curr_test = test("Get Latin word stats (XLingFunctions.py)") latin_text = ['L1', 'L2', 'L1', 'L1', 'L2', 'L3','L4', 'L5', 'L5', 'L5'] latin_search_dict = { 'L1' : [0,2,3], 'L2' : [1,4], 'L3' : [5], 'L4' : [6], 'L5': [7,8,9]} i = 3 j = 4 L1, L2 = xls.get_LatinWordStats(latin_text,latin_search_dict, i, j) if not ( L1.word == "L1"): curr_test.passed = False curr_test.errors.append("Latin word object #1 corresponds to the wrong word (" + str(L1.word) + ")") if not (L1.pos == 3): curr_test.passed = False curr_test.errors.append("Latin word object #1 corresponds to the wrong position in text (" + str(L1.pos) + ")") if not (L1.occurences == 3): curr_test.passed = False curr_test.errors.append("Latin word object #1 should occur 3 times, only occurs" + str(L1.occurences) + " times") if not ( L2.word == "L5"): curr_test.passed = False curr_test.errors.append("Latin word object #2 corresponds to the wrong word (" + str(L2.word) + ")" ) if not (L2.pos == 7): curr_test.passed = False curr_test.errors.append("Latin word object #2 corresponds to the wrong position in text (" + str(L2.pos) + ")" ) if not (L2.occurences == 3): curr_test.passed = False curr_test.errors.append("Latin word object #2 should occur 3 times, only occurs" + str(L2.occurences) + " times") tests.append(curr_test) #Find match pair test (three possible matches in greek test, should take the match with the two words side by side) Uses same L1,L2, Latin text, and latin text as above curr_test = test("Find match pair given three matches in the greek corpus (XLingFunctions.py") L1_translation = "g1" L2_translation = "g2" Greek_text = [ "g1", "x", "x", "g2", "g1", "x", "g2", "x", "x", "x", "g1" ] Greek_search_dict = {"g1": [0,4,10], "g2": [ 3,6], "x": [1,2,5,7,8,9]} bestMatch = xls.find_match_pair(L1, L2, L1_translation, L2_translation, Greek_search_dict, latin_text, len(latin_text), Greek_text, len(Greek_text), None) if not bestMatch: curr_test.passed = False curr_test.errors.append("No match was found") elif not (bestMatch.G1_pos == 4) or not(bestMatch.G2_pos == 3): curr_test.passed = False curr_test.errors.append("The best match did not occur in the expected position ( G1 = " +str(bestMatch.G1_pos) + " G2 = " + str(bestMatch.G2_pos) ) tests.append(curr_test) #Ensures that try_all_search_combos produces all combos of position indicies curr_test = test("Try all search combos (finds best match in a greek text given a search prhase)") search = search_phrase("L1 L2 L3", "Latin") search.has_translation = [1, 1, 1] search.text[0].translations = ["g1", "g4", "g3" ] search.text[1].translations = ["g6"] search.text[2].translations = ["g5", "g2"] score = scoreboard(1) xls.try_all_search_combos( search, score, len(Greek_text), Greek_search_dict, Greek_text) if not ( score.matches[0].G1_pos == 4 and score.matches[0].G2_pos == 3): curr_test.passed = False curr_test.errors.append("The wrong top match was found for the search") tests.append(curr_test) return tests