예제 #1
0
 def setUp(self):
     self.module_path = os.path.dirname(os.path.realpath(__file__))
     self.src_name = os.path.join(
         self.module_path,
         '../../preprocessing/tests/test_data/corpus.de.1000')
     self.tg_name = os.path.join(
         self.module_path,
         '../../preprocessing/tests/test_data/corpus.en.1000')
     self.aligner_no_model = AlignmentFeatureExtractor()
     self.aligner_no_model_2 = AlignmentFeatureExtractor(context_size=2)
예제 #2
0
 def test_alignment_no_target(self):
     alignmentFE = AlignmentFeatureExtractor()
     obj = {
         'token': u'hits',
         'index': 2,
         'source': [u'un', u'garcon', u'frappe', u'un', u'chien']
     }
     with self.assertRaises(NoDataError):
         alignmentFE.get_features(obj)
예제 #3
0
 def test_alignment_no_source(self):
     alignmentFE = AlignmentFeatureExtractor()
     obj = {
         'token': u'hits',
         'index': 2,
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     with self.assertRaises(NoDataError):
         alignmentFE.get_features(obj)
예제 #4
0
 def test_align_model_in_extractor(self):
     obj = {
         'token': u'boy',
         'index': 1,
         'source': [u'ein', u'junge', u'schlägt', u'einen', u'Hund'],
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     aligner_model = AlignmentFeatureExtractor(align_model=os.path.join(
         self.module_path, 'test_data/alignments/align_model'))
     (cont_word, left, right) = aligner_model.get_features(obj)
     self.assertTrue('alignments' in obj)
     self.assertEqual(cont_word, u'junge')
예제 #5
0
 def test_alignment_on_the_fly(self):
     obj = {
         'token': u'boy',
         'index': 1,
         'source': [u'ein', u'junge', u'schlägt', u'einen', u'Hund'],
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     aligner_corpus = AlignmentFeatureExtractor(src_file=self.src_name,
                                                tg_file=self.tg_name)
     (cont_word, left, right) = aligner_corpus.get_features(obj)
     self.assertTrue('alignments' in obj)
     self.assertEqual(cont_word, u'junge')
     for a_file in glob.glob('align_model.*'):
         os.remove(a_file)
     for a_file in glob.glob(
             os.path.basename(self.src_name) + '_' +
             os.path.basename(self.tg_name) + '*'):
         os.remove(a_file)