file2.close() #Testing file pointers file3 = open(args.SLFT) file4 = open(args.TLFT) #Create new files where we could write our Pairs file5 = open(args.OUT, 'w') while True: line = preprocess(file3.readline()) line1 = preprocess(file4.readline()) if not line or not line1: break if len(line.split()) > max_len: continue for s,t in zip(src_sentences, tgt_sentences): fms = FMS(s, line) max_fms = fms.get_max_fms() #Get max possible FMS for the pair if max_fms >= min_fms: fms = fms.calculate_using_wanger_fischer() #Get actual FMS if fms >= min_fms and fms < 1.0: file5.write(s+"\n") file5.write(t+"\n") file5.write(line+"\n") file5.write(line1+"\n") #Close remaining files file3.close() file4.close() file5.close()
use_caching = True if cache_db_file else False # Initiate and check Apertium apertium = Apertium(lps[0], lps[1]) (out, err) = apertium.check_installations(lp_dir) assertion(out, err) tmxf = TMXFile(tmxfile, lps[0], lps[1]) tmunits = tmxf.getunits() fmses = {} for tmxu in tmunits: src, tgt = preprocess(tmxu.getsource()), preprocess(tmxu.gettarget()) fms = FMS(s_sentence, src) max_fms = fms.get_max_fms() if max_fms >= min_fms: fms = fms.calculate_using_wanger_fischer() if fms >= min_fms: fmses[(src, tgt)] = fms assertion(fmses != {}, "No proper match with FMS > {0} could be found".format(min_fms)) sorted_fms = sorted(fmses, key=fmses.get) (src, tgt) = sorted_fms[0] # Best match patcher = Patcher(apertium, src, s_sentence, tgt, use_caching, cache_db_file) patches = patcher.patch(min_len, max_len, grounded, lp_dir) best_patch = patcher.get_best_patch()