Ejemplo n.º 1
0
min_fms = float(args.min_fms)
min_len = int(args.min_len)
max_len = int(args.max_len) if args.max_len else max(len(s_sentence.split()), len(s1_sentence.split()))

#Calculate FMS between S and S1.
fms = FMS(s_sentence, s1_sentence).calculate()

#Exit if low FMS.
assertion(fms >= min_fms, "Sentences have low fuzzy match score of %.02f." %fms)

#Get A set
phrase_extractor = PhraseExtractor(s_sentence, s1_sentence, min_len, max_len)
a_set = phrase_extractor.extract_pairs()

#Initiate and check Apertium
apertium = Apertium(lps[0], lps[1])
(out, err) = apertium.check_installations(lp_dir)
assertion(out, err)


# Prepare to Generate D set.
S = s_sentence.split()
S1 = s1_sentence.split()

src = ""
src1 = ""

for a,b,c,d in a_set:
	str1 = ' '.join(S[a: b+1])
	str2 = ' '.join(S1[c: d+1])
	print('("{0}", "{1}")'.format(str1, str2))
Ejemplo n.º 2
0
assertion(mode in ['all', 'cam', 'compare'], "Mode couldn't be identified.")
grounded = args.go
best_only = args.bo
min_fms = float(args.min_fms)
min_len = int(args.min_len)
max_len = int(args.max_len) 

warning(min_len > 1 & grounded, "min_len should be greater than 1")

cache_db_file = None
if cache != '':
	cache_db_file = cache

use_caching = True if cache_db_file else False

apertium = Apertium(lps[0], lps[1])
(out, err) = apertium.check_installations(lp_dir)
assertion(out, err)

#Reopen new file
file1 = open(args.out)

#Global values
gl_wer = []
best_wer = []
gl_up_wer = []
gl_no_of_patches = 0.0

if mode == 'compare':
	gl_wer2 = []
	best_wer2 = []