def extract_matches(excelFile): curr_match = 0 latin_words=[] greek_words=[] with xlrd.open_workbook(excelFile) as wb: sh = wb.sheet_by_index(0) for row in range(sh.nrows): latin_phrase = sh.cell(row,1).value greek_phrase = sh.cell(row,0).value latin_phrase = latin_phrase.split(" ") greek_phrase = greek_phrase.split(" ") if len(latin_phrase) == len(greek_phrase): for i in range(len(latin_phrase)): new_latin_word = trn.normalize_word(latin_phrase[i]) new_greek_word = trn.normalize_word(greek_phrase[i]) if (not "" == new_latin_word) and not ("" == new_greek_word): latin_words.insert(curr_match, new_latin_word) greek_words.insert(curr_match, new_greek_word) curr_match += 1 latin_phrase = None greek_phrase = None return curr_match, latin_words, greek_words
def __init__(self, word, pos=None, occurences=None, language=None): self.word = word self.pos = pos self.occurences = occurences if language: self.lemma = trn.normalize_word( trn.get_lemma(word, language) ) else: self.lemma = word self.translations = []