def _sameResultForEval(self,de,fr,s2t,t2s): options = { 'srcfile':de, 'targetfile':fr, 'srctotarget':s2t, 'targettosrc':t2s, 'verbosity':0, } a = Aligner(options) a.mainloop() output_src, output_target = a.results() s2t_src = output_src.getvalue().splitlines() s2t_trg = output_target.getvalue().splitlines() options = { 'srcfile':fr, 'targetfile':de, 'srctotarget':t2s, 'targettosrc':s2t, 'verbosity':0, } a = Aligner(options) a.mainloop() output_src, output_target = a.results() t2s_src = output_src.getvalue().splitlines() t2s_trg = output_target.getvalue().splitlines() self.assertEqual(s2t_src, t2s_trg) self.assertEqual(s2t_trg, t2s_src)
def _sameResultForEval(self, de, fr, s2t, t2s): options = { 'srcfile': de, 'targetfile': fr, 'srctotarget': s2t, 'targettosrc': t2s, 'verbosity': 0, } a = Aligner(options) a.mainloop() output_src, output_target = a.results() s2t_src = output_src.getvalue().splitlines() s2t_trg = output_target.getvalue().splitlines() options = { 'srcfile': fr, 'targetfile': de, 'srctotarget': t2s, 'targettosrc': s2t, 'verbosity': 0, } a = Aligner(options) a.mainloop() output_src, output_target = a.results() t2s_src = output_src.getvalue().splitlines() t2s_trg = output_target.getvalue().splitlines() self.assertEqual(s2t_src, t2s_trg) self.assertEqual(s2t_trg, t2s_src)
def align(srcfile, tgtfile, approx_src_tgt_file): # srcfile, tgtfile = tgtfile, srcfile options = { # source and target files needed by Aligner # they can be filenames, arrays of strings or io objects. 'srcfile': srcfile, 'targetfile': tgtfile, # translations of srcfile and targetfile, not influenced by 'factored' # they can be filenames, arrays of strings or io objects, too. 'srctotarget': [approx_src_tgt_file], 'targettosrc': [], # passing filenames or io object for them in respectly. # if not passing anything or assigning None, they will use StringIO to save results. 'output-src': None, 'output-target': None, # other options ... 'log_to': open("/dev/null", "w+") } a = Aligner(options) a.mainloop() output_src, output_target = a.results() # output_src, output_target is StringIO because options['output-src'] is None src = output_src.getvalue().splitlines() # StringIO member function tgt = output_target.getvalue().splitlines() # array of string return (src, tgt)
def main_test(self, option_function, close_file_object = None, remove_file = None): test_dir = os.path.dirname(os.path.abspath(__file__)) eval_dir = os.path.join(test_dir, '..', 'eval') result_dir = os.path.join(test_dir, 'result') refer_dir = os.path.join(test_dir, 'refer') bleualign.log = lambda a, b:None compare_files = [] for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]: fr_text = [] de_text = [] for filename in os.listdir(eval_dir): if filename.startswith(test_set): attr = filename.split('.') if len(attr) == 3: filepath = os.path.join(eval_dir, filename) if attr[1] != 'clean': if attr[2] == 'fr': fr_text.append(filepath) elif attr[2] == 'de': de_text.append(filepath) fr_text.sort() de_text.sort() test_files = [] test_files.append((fr_text[0:1], de_text[-3:-2])) test_files.append((fr_text, [])) test_files.append((fr_text[1::3], de_text[-2:-1])) test_files.append((fr_text[2:3], de_text[3:4])) test_files.append((fr_text[0:1], [])) test_files.append((fr_text[2:], de_text[:3])) test_files.append((fr_text, de_text)) # test_files.append(([], [])) add in another test after # test_files.append(([], de_text)) # test_files.append(([], de_text[-1:])) for fr_file, de_file in test_files: srctotarget_file = fr_file targettosrc_file = de_file output_file = self.output_file_path(srctotarget_file, targettosrc_file) output_path = os.path.join(result_dir , output_file) options = getattr(self, option_function)(test_argument, srctotarget_file, targettosrc_file, output_path) a = Aligner(options) a.mainloop() output_src, output_target = a.results() if close_file_object != None: getattr(self, close_file_object)([output_src, output_target]) getattr(self, close_file_object)([options['targetfile']]) getattr(self, close_file_object)(options['targettosrc']) if option_function == 'fileObjectOptions': getattr(self, close_file_object)([options['srcfile']]) getattr(self, close_file_object)(options['srctotarget']) refer_path = os.path.join(refer_dir , output_file) compare_files.append((output_path + '-s', refer_path + '-s', output_src)) compare_files.append((output_path + '-t', refer_path + '-t', output_target)) # compare result with data in refer for result_path, refer_path, output_object in compare_files: self.cmp_files(result_path, refer_path, output_object) if remove_file != None: getattr(self, remove_file)(result_path)
def 對齊(self, 原來, 目標, 原來翻目標, 目標翻原來, 原來對齊=None, 目標對齊=None): 參數 = self.公家參數.copy() 參數['srcfile'] = 原來 參數['targetfile'] = 目標 參數['srctotarget'] = 原來翻目標 參數['targettosrc'] = 目標翻原來 參數['output-src'] = 原來對齊 參數['output-target'] = 目標對齊 a = Aligner(參數) a.mainloop() return a.results()
def main_test(self, option_function): test_dir = os.path.dirname(os.path.abspath(__file__)) eval_dir = os.path.join(test_dir, '..', 'eval') result_dir = os.path.join(test_dir, 'result') refer_dir = os.path.join(test_dir, 'refer') bleualign.log = lambda a, b:None compare_files = [] for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]: fr_text = [] de_text = [] for filename in os.listdir(eval_dir): if filename.startswith(test_set): attr = filename.split('.') if len(attr) == 3: filepath = os.path.join(eval_dir, filename) if attr[1] != 'clean': if attr[2] == 'fr': fr_text.append(filepath) elif attr[2] == 'de': de_text.append(filepath) fr_text.sort() de_text.sort() test_files = [] test_files.append((fr_text[0:1], de_text[-3:-2], 'articles')) test_files.append((fr_text, [], 'sentences')) test_files.append((fr_text, de_text, 'sentences')) for fr_file, de_file, filter_type in test_files: srctotarget_file = fr_file targettosrc_file = de_file output_file = self.output_file_path(srctotarget_file, targettosrc_file) output_path = os.path.join(result_dir , output_file) options = getattr(self, option_function)(test_argument, filter_type, srctotarget_file, targettosrc_file, output_path) a = Aligner(options) a.mainloop() output_src, output_target = a.results() output_src_bad, output_target_bad = a.results_bad() if option_function == 'fileObjectOptions': output_src.close() output_target.close() output_src_bad.close() output_target_bad.close() refer_path = os.path.join(refer_dir , output_file) compare_files.append((output_path + '-good-s', refer_path + '-good-s', output_src)) compare_files.append((output_path + '-good-t', refer_path + '-good-t', output_target)) compare_files.append((output_path + '-bad-s', refer_path + '-bad-s', output_src_bad)) compare_files.append((output_path + '-bad-t', refer_path + '-bad-t', output_target_bad)) for result_path, refer_path, output_object in compare_files: self.cmp_files(result_path, refer_path, output_object) if option_function.startswith('file'): os.remove(result_path)
def bleu_align(self, srcfile, tgtfile, hyp_src_tgt_file): output = StringIO() # src_out, tgt_out = StringIO(), StringIO() options = { 'verbosity': 0, 'srcfile': srcfile, 'targetfile': tgtfile, 'srctotarget': [hyp_src_tgt_file], 'targettosrc': [], # 'output': output, # 'output-src': src_out, 'output-target': tgt_out, } a = Aligner(options) a.mainloop() src_out, tgt_out = a.results() srcs = src_out.getvalue().splitlines() tgts = tgt_out.getvalue().splitlines() return srcs, tgts
def test_gale_church(self): test_dir = os.path.dirname(os.path.abspath(__file__)) result_dir = os.path.join(test_dir, 'result') refer_dir = os.path.join(test_dir, 'refer') bleualign.log = lambda a, b:None compare_files = [] for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]: options = load_arguments(['', test_argument, '--srctotarget', '-']) output_file = test_set + '-galechurch' output_path = os.path.join(result_dir , output_file) # options['output'] = output_path a = Aligner(options) a.mainloop() output_src, output_target = a.results() refer_path = os.path.join(refer_dir , output_file) compare_files.append((output_path + '-s', refer_path + '-s', output_src)) compare_files.append((output_path + '-t', refer_path + '-t', output_target)) # compare result with data in refer for result_path, refer_path, output_object in compare_files: self.cmp_files(result_path, refer_path, output_object)
def bleu_align(self, srcfile, tgtfile, hyp_src_tgt_file=None): output = StringIO() options = { 'srcfile': srcfile, 'targetfile': tgtfile, 'galechurch': True if hyp_src_tgt_file is None else False, 'no_translation_override': True if hyp_src_tgt_file is None else False, 'srctotarget': [hyp_src_tgt_file] if hyp_src_tgt_file else [], 'targettosrc': [], 'verbosity': 0, } a = Aligner(options) a.mainloop() src_out, tgt_out = a.results() srcs = src_out.getvalue().splitlines() tgts = tgt_out.getvalue().splitlines() return srcs, tgts
def test_gale_church(self): test_dir = os.path.dirname(os.path.abspath(__file__)) result_dir = os.path.join(test_dir, 'result') refer_dir = os.path.join(test_dir, 'refer') bleualign.log = lambda a, b: None compare_files = [] for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]: options = load_arguments(['', test_argument, '--srctotarget', '-']) output_file = test_set + '-galechurch' output_path = os.path.join(result_dir, output_file) # options['output'] = output_path a = Aligner(options) a.mainloop() output_src, output_target = a.results() refer_path = os.path.join(refer_dir, output_file) compare_files.append( (output_path + '-s', refer_path + '-s', output_src)) compare_files.append( (output_path + '-t', refer_path + '-t', output_target)) # compare result with data in refer for result_path, refer_path, output_object in compare_files: self.cmp_files(result_path, refer_path, output_object)
# source and target files needed by Aligner # they can be filenames, arrays of strings or io objects. 'srcfile': os.path.join(current_path, '..', 'eval', 'eval1989.de'), 'targetfile': os.path.join(current_path, '..', 'eval', 'eval1989.fr'), # translations of srcfile and targetfile, not influenced by 'factored' # they can be filenames, arrays of strings or io objects, too. 'srctotarget': [os.path.join(current_path, '..', 'eval', 'eval1957.europarlfull.fr')], 'targettosrc': [], # passing filenames or io object for them in respectly. # if not passing anything or assigning None, they will use StringIO to save results. 'output-src': None, 'output-target': None, # other options ... } a = Aligner(options) a.mainloop() output_src, output_target = a.results() # output_src, output_target is StringIO because options['output-src'] is None src = output_src.getvalue() # StringIO member function trg = output_target.getvalue().splitlines() # array of string print('output_src.getvalue()') print(src[:30]) print() print('output_target.getvalue().splitlines()') print(trg[:3])
import os from bleualign.align import Aligner if __name__ == '__main__': current_path = os.path.dirname(os.path.abspath(__file__)) options = { # source and target files needed by Aligner # they can be filenames, arrays of strings or io objects. 'srcfile':os.path.join(current_path, '..', 'eval', 'eval1989.de'), 'targetfile': os.path.join(current_path, '..', 'eval', 'eval1989.fr'), # translations of srcfile and targetfile, not influenced by 'factored' # they can be filenames, arrays of strings or io objects, too. 'srctotarget': [os.path.join(current_path, '..', 'eval', 'eval1957.europarlfull.fr')], 'targettosrc': [], # passing filenames or io object for them in respectly. # if not passing anything or assigning None, they will use StringIO to save results. 'output-src': None, 'output-target': None, # other options ... } a = Aligner(options) a.mainloop() output_src, output_target = a.results() # output_src, output_target is StringIO because options['output-src'] is None src = output_src.getvalue() # StringIO member function trg = output_target.getvalue().splitlines() # array of string print('output_src.getvalue()') print(src[:30]) print() print('output_target.getvalue().splitlines()') print(trg[:3])
def main_test(self, option_function): test_dir = os.path.dirname(os.path.abspath(__file__)) eval_dir = os.path.join(test_dir, '..', 'eval') result_dir = os.path.join(test_dir, 'result') refer_dir = os.path.join(test_dir, 'refer') bleualign.log = lambda a, b: None compare_files = [] for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]: fr_text = [] de_text = [] for filename in os.listdir(eval_dir): if filename.startswith(test_set): attr = filename.split('.') if len(attr) == 3: filepath = os.path.join(eval_dir, filename) if attr[1] != 'clean': if attr[2] == 'fr': fr_text.append(filepath) elif attr[2] == 'de': de_text.append(filepath) fr_text.sort() de_text.sort() test_files = [] test_files.append((fr_text[0:1], de_text[-3:-2], 'articles')) test_files.append((fr_text, [], 'sentences')) test_files.append((fr_text, de_text, 'sentences')) for fr_file, de_file, filter_type in test_files: srctotarget_file = fr_file targettosrc_file = de_file output_file = self.output_file_path(srctotarget_file, targettosrc_file) output_path = os.path.join(result_dir, output_file) options = getattr(self, option_function)(test_argument, filter_type, srctotarget_file, targettosrc_file, output_path) a = Aligner(options) a.mainloop() output_src, output_target = a.results() output_src_bad, output_target_bad = a.results_bad() if option_function == 'fileObjectOptions': output_src.close() output_target.close() output_src_bad.close() output_target_bad.close() refer_path = os.path.join(refer_dir, output_file) compare_files.append((output_path + '-good-s', refer_path + '-good-s', output_src)) compare_files.append((output_path + '-good-t', refer_path + '-good-t', output_target)) compare_files.append((output_path + '-bad-s', refer_path + '-bad-s', output_src_bad)) compare_files.append( (output_path + '-bad-t', refer_path + '-bad-t', output_target_bad)) for result_path, refer_path, output_object in compare_files: self.cmp_files(result_path, refer_path, output_object) if option_function.startswith('file'): os.remove(result_path)