def extract_matches(excelFile):
	
	curr_match = 0 

	latin_words=[]
	greek_words=[]

	with xlrd.open_workbook(excelFile) as wb:
		sh = wb.sheet_by_index(0)  
		for row in range(sh.nrows):

			latin_phrase = sh.cell(row,1).value
			greek_phrase = sh.cell(row,0).value


			latin_phrase = latin_phrase.split(" ")
			greek_phrase = greek_phrase.split(" ")

			if len(latin_phrase) == len(greek_phrase):
				for i in range(len(latin_phrase)):

					new_latin_word = trn.normalize_word(latin_phrase[i])
					new_greek_word = trn.normalize_word(greek_phrase[i])

					if (not "" == new_latin_word) and not ("" == new_greek_word):
						latin_words.insert(curr_match, new_latin_word)
						greek_words.insert(curr_match, new_greek_word)
						curr_match += 1
			
			latin_phrase = None
			greek_phrase = None
	
	return curr_match, latin_words, greek_words
예제 #2
0
	def __init__(self, word, pos=None, occurences=None, language=None):
			self.word = word
			self.pos = pos
			self.occurences = occurences

			if language:
				self.lemma = trn.normalize_word( trn.get_lemma(word, language) )
			else:
				self.lemma = word 
				
			self.translations = []