min_fms = float(args.min_fms) min_len = int(args.min_len) max_len = int(args.max_len) if args.max_len else max(len(s_sentence.split()), len(s1_sentence.split())) #Calculate FMS between S and S1. fms = FMS(s_sentence, s1_sentence).calculate() #Exit if low FMS. assertion(fms >= min_fms, "Sentences have low fuzzy match score of %.02f." %fms) #Get A set phrase_extractor = PhraseExtractor(s_sentence, s1_sentence, min_len, max_len) a_set = phrase_extractor.extract_pairs() #Initiate and check Apertium apertium = Apertium(lps[0], lps[1]) (out, err) = apertium.check_installations(lp_dir) assertion(out, err) # Prepare to Generate D set. S = s_sentence.split() S1 = s1_sentence.split() src = "" src1 = "" for a,b,c,d in a_set: str1 = ' '.join(S[a: b+1]) str2 = ' '.join(S1[c: d+1]) print('("{0}", "{1}")'.format(str1, str2))
assertion(mode in ['all', 'cam', 'compare'], "Mode couldn't be identified.") grounded = args.go best_only = args.bo min_fms = float(args.min_fms) min_len = int(args.min_len) max_len = int(args.max_len) warning(min_len > 1 & grounded, "min_len should be greater than 1") cache_db_file = None if cache != '': cache_db_file = cache use_caching = True if cache_db_file else False apertium = Apertium(lps[0], lps[1]) (out, err) = apertium.check_installations(lp_dir) assertion(out, err) #Reopen new file file1 = open(args.out) #Global values gl_wer = [] best_wer = [] gl_up_wer = [] gl_no_of_patches = 0.0 if mode == 'compare': gl_wer2 = [] best_wer2 = []