def _do_patching(self, t_app, tau, tau1, covered_pos, grounded_only): (a,b) = tau t_app = t_app.split() if(any(a<=c<=b for c in covered_pos)): return None, None seg = ' '.join(t_app[a:b+1]) seg_left = ' '.join(t_app[:a]) seg_right = ' '.join(t_app[b+1:]) if grounded_only: pe = PhraseExtractor(seg.lower(), tau1.lower()) aligns = pe.find_alignments() if aligns == []: return None, None p = min(a[0] for a in aligns) q = max(a[0] for a in aligns) r = min(a[1] for a in aligns) s = max(a[1] for a in aligns) if p == q or r ==s or p != 0 or q != (b-a) or r != 0 or s != len(tau1.split())-1 : return None, None seg = tau1.split() pe = PhraseExtractor(' '.join(t_app[a:b+1]).lower(), tau1.lower()) aligns = pe.find_alignments() tg_aligns = [x for (_, x) in aligns] cp = [a+i for i in range(len(seg)) if i not in tg_aligns] cp += covered_pos # print(cp) if seg_left != '': tau1 = tau1.lower() return (seg_left + ' ' + tau1 + ' ' + seg_right).strip(), cp
assertion(len(lps) == 2, "LP should be of type a-b, eg, 'en-eo'") #Read optional params lp_dir = args.d min_fms = float(args.min_fms) min_len = int(args.min_len) max_len = int(args.max_len) if args.max_len else max(len(s_sentence.split()), len(s1_sentence.split())) #Calculate FMS between S and S1. fms = FMS(s_sentence, s1_sentence).calculate() #Exit if low FMS. assertion(fms >= min_fms, "Sentences have low fuzzy match score of %.02f." %fms) #Get A set phrase_extractor = PhraseExtractor(s_sentence, s1_sentence, min_len, max_len) a_set = phrase_extractor.extract_pairs() #Initiate and check Apertium apertium = Apertium(lps[0], lps[1]) (out, err) = apertium.check_installations(lp_dir) assertion(out, err) # Prepare to Generate D set. S = s_sentence.split() S1 = s1_sentence.split() src = "" src1 = ""
def _do_edit_distace_alignment(self, min_len, max_len): #Do edit distance alignment phrase_extractor = PhraseExtractor(self.s_sentence, self.s1_sentence, min_len, max_len) self.phrases = phrase_extractor.extract_pairs() self.src_mismatches, self.tgt_mismatches = phrase_extractor.find_non_alignments()