Example #1
0
 def test_source_lm_no_source(self):
     slmFE = SourceLMFeatureExtractor(
         os.path.join(self.module_path,
                      '../../experiment/tiny_test/europarl.1000.en'))
     obj = {
         'token': u'hits',
         'index': 2,
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     with self.assertRaises(NoDataError):
         slmFE.get_features(obj)
class LMFeatureExtractorTests(unittest.TestCase):

    def setUp(self):
        module_path = os.path.dirname(os.path.realpath(__file__))
        self.module_path = module_path
        self.lm3Extractor = SourceLMFeatureExtractor(os.path.join(module_path, 'test_data/training.txt'))
        self.lm5Extractor = SourceLMFeatureExtractor(os.path.join(module_path, 'test_data/training.txt'), order=5)

    def test_get_features(self):
        # { 'token': <token>, index: <idx>, 'source': [<source toks>]', 'target': [<target toks>], 'tag': <tag>}
        (left3, right3) = self.lm3Extractor.get_features({'token': 'est', 'index': 2, 'target': [u'c',u'\'',u'est',u'un',u'garçon'], 'source': [u'It', u'becomes', u'more', u'and', u'more', u'difficult', u'for', u'us', u'to', u'protect', u'her', u'brands', u'in', u'China', '.'], 'tag':'G', 'alignments': [[], [], [6], [], []]})
        (left5, right5) = self.lm5Extractor.get_features({'token':'est', 'index':2, 'target':[u'c',u'\'',u'est',u'un',u'garçon'], 'source':[u'It', u'becomes', u'more', u'and', u'more', u'difficult', u'for', u'us', u'to', u'protect', u'her', u'brands', u'in', u'China', '.'], 'tag':'G', 'alignments': [[], [], [6], [], []]})
        self.assertEqual(left3, 3)
        self.assertEqual(right3, 2)
        self.assertEqual(left5, 5)
        self.assertEqual(right5, 2)

    # TODO: if source or alignment don't exist, an error should be thrown 
    def test_no_source(self):
        features = self.lm3Extractor.get_features({'token': 'est', 'index': 2, 'target': [u'c',u'\'',u'est',u'un',u'garçon'], 'tag':'G'})
        self.assertEqual(features, [])

    def test_no_alignments(self):
        features = self.lm3Extractor.get_features({'token': 'est', 'index': 2, 'target': [u'c',u'\'',u'est',u'un',u'garçon'], 'source': [u'It', u'becomes', u'more', u'and', u'more', u'difficult', u'for', u'us', u'to', u'protect', u'her', u'brands', u'in', u'China', '.'], 'tag':'G'})
        self.assertEqual(features, [])

    def test_unaligned(self):
        left_ngram, right_ngram = self.lm3Extractor.get_features({'token': 'est', 'index': 2, 'target': [u'c',u'\'',u'est',u'un',u'garçon'], 'source': [u'this', u'is', u'a', u'boy'], 'alignments':[[0], [1], [], [3], [4]], 'tag':'G'})
        self.assertEqual(left_ngram, 0)
        self.assertEqual(right_ngram, 0)

    def test_multi_alignment(self):
        (left3, right3) = self.lm3Extractor.get_features({'token': 'est', 'index': 2, 'target': [u'c',u'\'',u'est',u'un',u'garçon'], 'source': [u'It', u'becomes', u'more', u'and', u'more', u'difficult', u'for', u'us', u'to', u'protect', u'her', u'brands', u'in', u'China', '.'], 'tag':'G', 'alignments': [[], [], [6, 7], [], []]})
        self.assertEqual(left3, 2)
        self.assertEqual(right3, 2)
Example #3
0
 def test_source_lm_no_alignments(self):
     slmFE = SourceLMFeatureExtractor(os.path.join(self.module_path, '../../experiment/tiny_test/europarl.1000.en'))
     obj = {'token':u'hits', 'index':2, 'target':[u'a',u'boy',u'hits',u'a',u'dog'], 'source':[u'un', u'garcon',u'frappe', u'un', u'chien']}
     with self.assertRaises(NoDataError):
         slmFE.get_features(obj)
Example #4
0
class LMFeatureExtractorTests(unittest.TestCase):
    def setUp(self):
        module_path = os.path.dirname(os.path.realpath(__file__))
        self.module_path = module_path
        self.lm3Extractor = SourceLMFeatureExtractor(
            corpus_file=os.path.join(module_path, 'test_data/training.txt'))
        self.lm5Extractor = SourceLMFeatureExtractor(corpus_file=os.path.join(
            module_path, 'test_data/training.txt'),
                                                     order=5)

    def test_get_features(self):
        # { 'token': <token>, index: <idx>, 'source': [<source toks>]', 'target': [<target toks>], 'tag': <tag>}
        (left3, right3) = self.lm3Extractor.get_features({
            'token':
            'est',
            'index':
            2,
            'target': [u'c', u'\'', u'est', u'un', u'garçon'],
            'source': [
                u'It', u'becomes', u'more', u'and', u'more', u'difficult',
                u'for', u'us', u'to', u'protect', u'her', u'brands', u'in',
                u'China', '.'
            ],
            'tag':
            'G',
            'alignments': [None, None, 6, None, None]
        })
        (left5, right5) = self.lm5Extractor.get_features({
            'token':
            'est',
            'index':
            2,
            'target': [u'c', u'\'', u'est', u'un', u'garçon'],
            'source': [
                u'It', u'becomes', u'more', u'and', u'more', u'difficult',
                u'for', u'us', u'to', u'protect', u'her', u'brands', u'in',
                u'China', '.'
            ],
            'tag':
            'G',
            'alignments': [None, None, 6, None, None]
        })
        self.assertEqual(left3, 3)
        self.assertEqual(right3, 2)
        self.assertEqual(left5, 5)
        self.assertEqual(right5, 2)

    # TODO: if source or alignment don't exist, an error should be thrown
    def test_no_source(self):
        with self.assertRaises(NoDataError):
            features = self.lm3Extractor.get_features({
                'token':
                'est',
                'index':
                2,
                'target': [u'c', u'\'', u'est', u'un', u'garçon'],
                'tag':
                'G'
            })

    def test_no_alignments(self):
        with self.assertRaises(NoDataError):
            features = self.lm3Extractor.get_features({
                'token':
                'est',
                'index':
                2,
                'target': [u'c', u'\'', u'est', u'un', u'garçon'],
                'source': [
                    u'It', u'becomes', u'more', u'and', u'more', u'difficult',
                    u'for', u'us', u'to', u'protect', u'her', u'brands', u'in',
                    u'China', '.'
                ],
                'tag':
                'G'
            })

    def test_unaligned(self):
        left_ngram, right_ngram = self.lm3Extractor.get_features({
            'token':
            'est',
            'index':
            2,
            'target': [u'c', u'\'', u'est', u'un', u'garçon'],
            'source': [u'this', u'is', u'a', u'boy'],
            'alignments': [0, 1, None, 3, 4],
            'tag':
            'G'
        })
        self.assertEqual(left_ngram, 0)
        self.assertEqual(right_ngram, 0)

    def test_multi_alignment(self):
        (left3, right3) = self.lm3Extractor.get_features({
            'token':
            'est',
            'index':
            2,
            'target': [u'c', u'\'', u'est', u'un', u'garçon'],
            'source': [
                u'It', u'becomes', u'more', u'and', u'more', u'difficult',
                u'for', u'us', u'to', u'protect', u'her', u'brands', u'in',
                u'China', '.'
            ],
            'tag':
            'G',
            'alignments': [None, None, 6, None, None]
        })
        self.assertEqual(left3, 3)
        self.assertEqual(right3, 2)