#Read optional params lp_dir = args.d min_fms = float(args.min_fms) min_len = int(args.min_len) max_len = int(args.max_len) if args.max_len else max(len(s_sentence.split()), len(s1_sentence.split())) #Calculate FMS between S and S1. fms = FMS(s_sentence, s1_sentence).calculate() #Exit if low FMS. assertion(fms >= min_fms, "Sentences have low fuzzy match score of %.02f." %fms) #Get A set phrase_extractor = PhraseExtractor(s_sentence, s1_sentence, min_len, max_len) a_set = phrase_extractor.extract_pairs() #Initiate and check Apertium apertium = Apertium(lps[0], lps[1]) (out, err) = apertium.check_installations(lp_dir) assertion(out, err) # Prepare to Generate D set. S = s_sentence.split() S1 = s1_sentence.split() src = "" src1 = "" for a,b,c,d in a_set:
def _do_edit_distace_alignment(self, min_len, max_len): #Do edit distance alignment phrase_extractor = PhraseExtractor(self.s_sentence, self.s1_sentence, min_len, max_len) self.phrases = phrase_extractor.extract_pairs() self.src_mismatches, self.tgt_mismatches = phrase_extractor.find_non_alignments()