def find_match_pair(L1, L2, L1_translation, L2_translation, Greek_search_dict, Latin_text, Latin_word_num, Greek_text, Greek_word_num, max_distance_greek = None): maxScore = None currScore = 0 bestMatch = None # G1 and G2 each represent an array of all the locations where a matched word appears in the corresponding document G1_translation_array, G2_translation_array = get_GreekPair(L1_translation, L2_translation, Greek_search_dict) # If either of the translation arrays are empty, return a None value if G1_translation_array == None or G2_translation_array == None: return None # Iterates through all possible matches for the translations of L1 and L2 for G1_pos in G1_translation_array: for G2_pos in G2_translation_array: # Makes sure that distance between two greek words is constrained if max_distance_greek == None or (abs(G2_pos - G1_pos) <= max_distance_greek): currScore = calc_match_pair_score(L1.pos, L1.occurences, L2.pos, L2.occurences, Latin_word_num, G1_pos, len(G1_translation_array), G2_pos, len(G2_translation_array), Greek_word_num) if (maxScore == None) or (currScore > maxScore): maxScore = currScore bestMatch = xling_match(L1.pos, L2.pos, Latin_text, G1_pos, G2_pos, Greek_text, maxScore) else: continue if (bestMatch == None): return None else: return bestMatch
def calc_match_phrase_score(search, translated_indices, scoreKeeper, Greek_word_num, Greek_search_dict, Greek_text): sum_of_word_occurences = 0 f_g = 0 for word_index in translated_indices: num_word_occurences = float(len(Greek_search_dict[Greek_text[word_index]])) f_g = + float(Greek_word_num / num_word_occurences) if (f_g <= 0): return max_d_g = 0 # Length is calculated by subtracting pos of first word from pos of last word for i in range(len(translated_indices)): for j in range(len(translated_indices)): d_g = abs(translated_indices[len(translated_indices) - 1] - translated_indices[0]) if (d_g > max_d_g): max_d_g = d_g # Case where multiple words map to same word if (max_d_g == 0): max_d_g = 1 # f_l is 1 since all the latin words we have are contained in the search phrase f_l = 1 # d_l is search_phrase.len d_l = search.search_len score = (f_l + f_g) / (d_l + max_d_g) curr_match = xling_match(0, search.search_len - 1, search.text, translated_indices[0], translated_indices[len(translated_indices) - 1], Greek_text, score) scoreKeeper.add_newMatch(curr_match) return