def _sameResultForEval(self, de, fr, s2t, t2s):
     options = {
         'srcfile': de,
         'targetfile': fr,
         'srctotarget': s2t,
         'targettosrc': t2s,
         'verbosity': 0,
     }
     a = Aligner(options)
     a.mainloop()
     output_src, output_target = a.results()
     s2t_src = output_src.getvalue().splitlines()
     s2t_trg = output_target.getvalue().splitlines()
     options = {
         'srcfile': fr,
         'targetfile': de,
         'srctotarget': t2s,
         'targettosrc': s2t,
         'verbosity': 0,
     }
     a = Aligner(options)
     a.mainloop()
     output_src, output_target = a.results()
     t2s_src = output_src.getvalue().splitlines()
     t2s_trg = output_target.getvalue().splitlines()
     self.assertEqual(s2t_src, t2s_trg)
     self.assertEqual(s2t_trg, t2s_src)
Ejemplo n.º 2
0
def align(srcfile, tgtfile, approx_src_tgt_file):
    # srcfile, tgtfile = tgtfile, srcfile
    options = {
        # source and target files needed by Aligner
        # they can be filenames, arrays of strings or io objects.
        'srcfile': srcfile,
        'targetfile': tgtfile,
        # translations of srcfile and targetfile, not influenced by 'factored'
        # they can be filenames, arrays of strings or io objects, too.
        'srctotarget': [approx_src_tgt_file],
        'targettosrc': [],
        # passing filenames or io object for them in respectly.
        # if not passing anything or assigning None, they will use StringIO to save results.
        'output-src': None, 
        'output-target': None,
        # other options ...
        'log_to': open("/dev/null", "w+")
    }
    a = Aligner(options)
    a.mainloop()
    output_src, output_target = a.results()
    # output_src, output_target is StringIO because options['output-src'] is None
    src = output_src.getvalue().splitlines()  # StringIO member function
    tgt = output_target.getvalue().splitlines()  # array of string
    return (src, tgt)
	def main_test(self, option_function,
			close_file_object = None, remove_file = None):
		test_dir = os.path.dirname(os.path.abspath(__file__))
		eval_dir = os.path.join(test_dir, '..', 'eval')
		result_dir = os.path.join(test_dir, 'result')
		refer_dir = os.path.join(test_dir, 'refer')
		bleualign.log = lambda a, b:None
		compare_files = []
		for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]:
			fr_text = []
			de_text = []
			for filename in os.listdir(eval_dir):
				if filename.startswith(test_set):
					attr = filename.split('.')
					if len(attr) == 3:
						filepath = os.path.join(eval_dir, filename)
						if attr[1] != 'clean':
							if attr[2] == 'fr':
								fr_text.append(filepath)
							elif attr[2] == 'de':
								de_text.append(filepath)
			fr_text.sort()
			de_text.sort()
			test_files = []
			test_files.append((fr_text[0:1], de_text[-3:-2]))
			test_files.append((fr_text, []))
			test_files.append((fr_text[1::3], de_text[-2:-1]))
			test_files.append((fr_text[2:3], de_text[3:4]))
			test_files.append((fr_text[0:1], []))
			test_files.append((fr_text[2:], de_text[:3]))
			test_files.append((fr_text, de_text))
# 			test_files.append(([], [])) add in another test after
# 			test_files.append(([], de_text))
# 			test_files.append(([], de_text[-1:]))
			for fr_file, de_file in test_files:
				srctotarget_file = fr_file
				targettosrc_file = de_file
				output_file = self.output_file_path(srctotarget_file, targettosrc_file)
				output_path = os.path.join(result_dir , output_file)
				options = getattr(self, option_function)(test_argument,
					srctotarget_file, targettosrc_file, output_path)
				a = Aligner(options)
				a.mainloop()
				output_src, output_target = a.results()
				if close_file_object != None:
					getattr(self, close_file_object)([output_src, output_target])
					getattr(self, close_file_object)([options['targetfile']])
					getattr(self, close_file_object)(options['targettosrc'])
					if option_function == 'fileObjectOptions':
						getattr(self, close_file_object)([options['srcfile']])
						getattr(self, close_file_object)(options['srctotarget'])
				refer_path = os.path.join(refer_dir , output_file)
				compare_files.append((output_path + '-s', refer_path + '-s', output_src))
				compare_files.append((output_path + '-t', refer_path + '-t', output_target))
		# compare result with data in refer
		for result_path, refer_path, output_object in compare_files:
			self.cmp_files(result_path, refer_path, output_object)
			if remove_file != None:
				getattr(self, remove_file)(result_path)
Ejemplo n.º 4
0
 def test_no_translation(self):
     self.assertRaises(ValueError, Aligner, {
         'srcfile': self.srcfile,
         'targetfile': self.targetfile
     })
     a = Aligner({
         'srcfile': self.srcfile,
         'targetfile': self.targetfile,
         'no_translation_override': True
     })
     a.close_file_streams()
     a = Aligner({
         'srcfile': self.srcfile,
         'targetfile': self.targetfile,
         'srctotarget': [self.targetfile]
     })
     a.close_file_streams()
     a = Aligner({
         'srcfile': self.srcfile,
         'targetfile': self.targetfile,
         'targettosrc': [self.srcfile]
     })
     a.close_file_streams()
Ejemplo n.º 5
0
    def bleu_align(self, srcfile, tgtfile, hyp_src_tgt_file):
        output = StringIO()
        # src_out, tgt_out = StringIO(), StringIO()
        options = {
            'verbosity': 0,
            'srcfile': srcfile,
            'targetfile': tgtfile,
            'srctotarget': [hyp_src_tgt_file],
            'targettosrc': [],
            # 'output': output,
            # 'output-src': src_out, 'output-target': tgt_out,
        }

        a = Aligner(options)
        a.mainloop()
        src_out, tgt_out = a.results()
        srcs = src_out.getvalue().splitlines()
        tgts = tgt_out.getvalue().splitlines()
        return srcs, tgts
Ejemplo n.º 6
0
    def bleu_align(self, srcfile, tgtfile, hyp_src_tgt_file=None):
        output = StringIO()
        options = {
            'srcfile': srcfile,
            'targetfile': tgtfile,
            'galechurch': True if hyp_src_tgt_file is None else False,
            'no_translation_override':
            True if hyp_src_tgt_file is None else False,
            'srctotarget': [hyp_src_tgt_file] if hyp_src_tgt_file else [],
            'targettosrc': [],
            'verbosity': 0,
        }

        a = Aligner(options)
        a.mainloop()
        src_out, tgt_out = a.results()

        srcs = src_out.getvalue().splitlines()
        tgts = tgt_out.getvalue().splitlines()

        return srcs, tgts
Ejemplo n.º 7
0
 def test_gale_church(self):
     test_dir = os.path.dirname(os.path.abspath(__file__))
     result_dir = os.path.join(test_dir, 'result')
     refer_dir = os.path.join(test_dir, 'refer')
     bleualign.log = lambda a, b: None
     compare_files = []
     for test_set, test_argument in [('eval1957', '-d'),
                                     ('eval1989', '-e')]:
         options = load_arguments(['', test_argument, '--srctotarget', '-'])
         output_file = test_set + '-galechurch'
         output_path = os.path.join(result_dir, output_file)
         # 			options['output'] = output_path
         a = Aligner(options)
         a.mainloop()
         output_src, output_target = a.results()
         refer_path = os.path.join(refer_dir, output_file)
         compare_files.append(
             (output_path + '-s', refer_path + '-s', output_src))
         compare_files.append(
             (output_path + '-t', refer_path + '-t', output_target))
     # compare result with data in refer
     for result_path, refer_path, output_object in compare_files:
         self.cmp_files(result_path, refer_path, output_object)
Ejemplo n.º 8
0
jobs = []

for source_document in [
        d for d in os.listdir(directory) if d.endswith('.' + source_suffix)
]:

    source_document = os.path.join(directory, source_document)
    target_document = source_document[:-len(source_suffix)] + target_suffix
    translation_document = source_document[:-len(source_suffix
                                                 )] + translation_suffix

    # Sanity checks
    for f in source_document, target_document, translation_document:
        if not os.path.isfile(f):
            sys.stderr.write(
                'ERROR: File {0} expected, but not found\n'.format(f))
            exit()

    jobs.append((source_document, target_document, translation_document))

for (source_document, target_document, translation_document) in jobs:

    options['srcfile'] = source_document
    options['targetfile'] = target_document
    options['srctotarget'] = [translation_document]
    options['output-src'] = source_document + '.aligned'
    options['output-target'] = target_document + '.aligned'

    a = Aligner(options)
    a.mainloop()
 def main_test(self, option_function):
     test_dir = os.path.dirname(os.path.abspath(__file__))
     eval_dir = os.path.join(test_dir, '..', 'eval')
     result_dir = os.path.join(test_dir, 'result')
     refer_dir = os.path.join(test_dir, 'refer')
     bleualign.log = lambda a, b: None
     compare_files = []
     for test_set, test_argument in [('eval1957', '-d'),
                                     ('eval1989', '-e')]:
         fr_text = []
         de_text = []
         for filename in os.listdir(eval_dir):
             if filename.startswith(test_set):
                 attr = filename.split('.')
                 if len(attr) == 3:
                     filepath = os.path.join(eval_dir, filename)
                     if attr[1] != 'clean':
                         if attr[2] == 'fr':
                             fr_text.append(filepath)
                         elif attr[2] == 'de':
                             de_text.append(filepath)
         fr_text.sort()
         de_text.sort()
         test_files = []
         test_files.append((fr_text[0:1], de_text[-3:-2], 'articles'))
         test_files.append((fr_text, [], 'sentences'))
         test_files.append((fr_text, de_text, 'sentences'))
         for fr_file, de_file, filter_type in test_files:
             srctotarget_file = fr_file
             targettosrc_file = de_file
             output_file = self.output_file_path(srctotarget_file,
                                                 targettosrc_file)
             output_path = os.path.join(result_dir, output_file)
             options = getattr(self,
                               option_function)(test_argument, filter_type,
                                                srctotarget_file,
                                                targettosrc_file,
                                                output_path)
             a = Aligner(options)
             a.mainloop()
             output_src, output_target = a.results()
             output_src_bad, output_target_bad = a.results_bad()
             if option_function == 'fileObjectOptions':
                 output_src.close()
                 output_target.close()
                 output_src_bad.close()
                 output_target_bad.close()
             refer_path = os.path.join(refer_dir, output_file)
             compare_files.append((output_path + '-good-s',
                                   refer_path + '-good-s', output_src))
             compare_files.append((output_path + '-good-t',
                                   refer_path + '-good-t', output_target))
             compare_files.append((output_path + '-bad-s',
                                   refer_path + '-bad-s', output_src_bad))
             compare_files.append(
                 (output_path + '-bad-t', refer_path + '-bad-t',
                  output_target_bad))
     for result_path, refer_path, output_object in compare_files:
         self.cmp_files(result_path, refer_path, output_object)
         if option_function.startswith('file'):
             os.remove(result_path)