Esempio n. 1
0
 def test_pos_no_target(self):
     posFE = POSFeatureExtractor()
     obj = {
         'token': u'hits',
         'index': 2,
         'source': [u'un', u'garcon', u'frappe', u'un', u'chien']
     }
     with self.assertRaises(NoDataError):
         posFE.get_features(obj)
Esempio n. 2
0
 def test_pos_no_tagger_params(self):
     posFE = POSFeatureExtractor(
         tagger='../../experiment/tiny_test/tree-tagger')
     obj = {
         'token': u'hits',
         'index': 2,
         'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     with self.assertRaises(NoResourceError):
         posFE.get_features(obj)
class POSFeatureExtractorTests(unittest.TestCase):

  # check: POS rerpresentation in context_obj
  # no POS representation
    def setUp(self):
        tagger_root = os.environ['TREE_TAGGER'] if os.environ.has_key('TREE_TAGGER') else ''
        if tagger_root == '':
            sys.stderr('TREE_TAGGER environment variable should be defined so that $TREE_TAGGER/bin/tree-tagger exists\n')
            sys.exit(2)
        self.tagger = tagger_root+'/bin/tree-tagger'
        self.par_src = tagger_root+'/lib/english-utf8.par'
        module_path = os.path.dirname(os.path.realpath(__file__))
        self.par_tg = os.path.join(module_path, 'test_data/spanish-par-linux-3.2-utf8.bin')
        self.extractor_pos = POSFeatureExtractor( tagger=self.tagger, par_file_src=self.par_src, par_file_tg=self.par_tg )
        self.extractor_no_pos = POSFeatureExtractor()


    def test_pos_in_obj(self):
        obj = {'token':u'a', 'index':0, 'target':[u'a',u'boy',u'hits',u'a',u'dog'], 'source':[u'un', u'garcon',u'frappe', u'un', u'chien'], 'target_pos':['DT','NN','VBZ', 'DT', 'NN'], 'source_pos':['DT','NN','VBZ', 'DT', 'NN'], 'alignments':[[0],[1],[2],[3],[4]]}
        obj_no_align = {'token':u'a', 'index':0, 'target':[u'a',u'boy',u'hits',u'a',u'dog'], 'source':[u'un', u'garcon',u'frappe', u'un', u'chien'], 'target_pos':['DT','NN','VBZ', 'DT', 'NN'], 'source_pos':['DT','NN','VBZ', 'DT', 'NN']}
        (t1, s1) = self.extractor_no_pos.get_features(obj)
        (t2, s2) = self.extractor_no_pos.get_features(obj_no_align)
        self.assertEqual(t1, u'DT')
        self.assertEqual(s1, ['DT'])
        self.assertEqual(t2, 'DT')
        self.assertEqual(s2, [])

    def test_tag_on_the_fly(self):
        # tagging on the fly, adding tagging to the object
        obj = {'token':u'niño', 'index':1, 'source':[u'a',u'boy',u'hits',u'a',u'dog'], 'target':[ u'un', u'niño', u'vapulea', u'un', u'perro'], 'alignments':[[0],[1],[2],[3],[4]]}
        (t1, s1) = self.extractor_pos.get_features(obj)
        self.assertEqual(t1, 'NC')
        self.assertEqual(s1, [u'NN'])
        self.assertTrue(obj.has_key('target_pos'))
        self.assertTrue(obj.has_key('source_pos'))

    def test_no_tagger(self):
        # no information for tagging
        err = StringIO.StringIO()
        sys.stderr = err

        obj2 = {'token':u'niño', 'index':1, 'source':[u'a',u'boy',u'hits',u'a',u'dog'], 'target':[ u'un', u'niño', u'vapulea', u'un', u'perro'], 'alignments':[[0],[1],[2],[3],[4]]}
        (t2, s2) = self.extractor_no_pos.get_features(obj2)
        self.assertEqual( err.getvalue(), 'Tagging script and parameter file should be provided\nTagging script and parameter file should be provided\n' )
        err.close()
        self.assertEqual(t2, u'')
        self.assertEqual(s2, [])

    def test_only_target_tagging(self):
        # no alignments
        obj = {'token':u'niño', 'index':1, 'source':[u'a',u'boy',u'hits',u'a',u'dog'], 'target':[ u'un', u'niño', u'vapulea', u'un', u'perro']}
        (t1, s1) = self.extractor_pos.get_features(obj)
        self.assertEqual(t1, 'NC')
        self.assertEqual(s1, [])
Esempio n. 4
0
 def test_pos_no_source(self):
     posFE = POSFeatureExtractor(
         tagger=os.path.join(self.module_path,
                             '../../experiment/tiny_test/tree-tagger'),
         par_file_src=os.path.join(
             self.module_path,
             '../../experiment/tiny_test/spanish-par-linux-3.2-utf8.bin'),
         par_file_tg=os.path.join(
             self.module_path,
             '../../experiment/tiny_test/english-utf8.par'))
     obj = {
         'token': u'hits',
         'index': 2,
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     with self.assertRaises(NoDataError):
         posFE.get_features(obj)
Esempio n. 5
0
 def test_pos_no_tagger_params(self):
     posFE = POSFeatureExtractor(tagger='../../experiment/tiny_test/tree-tagger')
     obj = {'token':u'hits', 'index':2, 'source':[u'un', u'garcon',u'frappe', u'un', u'chien'], 'target':[u'a',u'boy',u'hits',u'a',u'dog']}
     with self.assertRaises(NoResourceError):
         posFE.get_features(obj)
Esempio n. 6
0
 def test_pos_no_tagger(self):
     posFE = POSFeatureExtractor()
     obj = {'token':u'hits', 'index':2, 'source':[u'un', u'garcon',u'frappe', u'un', u'chien'], 'target':[u'a',u'boy',u'hits',u'a',u'dog']}
     with self.assertRaises(NoResourceError):
         posFE.get_features(obj)
Esempio n. 7
0
 def test_pos_no_source(self):
     posFE = POSFeatureExtractor(tagger=os.path.join(self.module_path, '../../experiment/tiny_test/tree-tagger'), par_file_src=os.path.join(self.module_path, '../../experiment/tiny_test/spanish-par-linux-3.2-utf8.bin'), par_file_tg=os.path.join(self.module_path, '../../experiment/tiny_test/english-utf8.par'))
     obj = {'token':u'hits', 'index':2, 'target':[u'a',u'boy',u'hits',u'a',u'dog']}
     with self.assertRaises(NoDataError):
         posFE.get_features(obj)
class POSFeatureExtractorTests(unittest.TestCase):

    # check: POS rerpresentation in context_obj
    # no POS representation
    def setUp(self):
        tagger_root = os.environ['TREE_TAGGER'] if os.environ.has_key(
            'TREE_TAGGER') else ''
        if tagger_root == '':
            sys.stderr(
                'TREE_TAGGER environment variable should be defined so that $TREE_TAGGER/bin/tree-tagger exists\n'
            )
            sys.exit(2)
        self.tagger = tagger_root + '/bin/tree-tagger'
        self.par_src = tagger_root + '/lib/english-utf8.par'
        module_path = os.path.dirname(os.path.realpath(__file__))
        self.par_tg = os.path.join(module_path,
                                   'test_data/spanish-par-linux-3.2-utf8.bin')
        self.extractor_pos = POSFeatureExtractor(tagger=self.tagger,
                                                 par_file_src=self.par_src,
                                                 par_file_tg=self.par_tg)
        self.extractor_no_pos = POSFeatureExtractor()

    def test_pos_in_obj(self):
        obj = {
            'token': u'a',
            'index': 0,
            'target': [u'a', u'boy', u'hits', u'a', u'dog'],
            'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
            'target_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'source_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'alignments': [[0], [1], [2], [3], [4]]
        }
        obj_no_align = {
            'token': u'a',
            'index': 0,
            'target': [u'a', u'boy', u'hits', u'a', u'dog'],
            'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
            'target_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'source_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN']
        }
        (t1, s1) = self.extractor_no_pos.get_features(obj)
        (t2, s2) = self.extractor_no_pos.get_features(obj_no_align)
        self.assertEqual(t1, u'DT')
        self.assertEqual(s1, ['DT'])
        self.assertEqual(t2, 'DT')
        self.assertEqual(s2, [])

    def test_tag_on_the_fly(self):
        # tagging on the fly, adding tagging to the object
        obj = {
            'token': u'niño',
            'index': 1,
            'source': [u'a', u'boy', u'hits', u'a', u'dog'],
            'target': [u'un', u'niño', u'vapulea', u'un', u'perro'],
            'alignments': [[0], [1], [2], [3], [4]]
        }
        (t1, s1) = self.extractor_pos.get_features(obj)
        self.assertEqual(t1, 'NC')
        self.assertEqual(s1, [u'NN'])
        self.assertTrue(obj.has_key('target_pos'))
        self.assertTrue(obj.has_key('source_pos'))

    def test_no_tagger(self):
        # no information for tagging
        err = StringIO.StringIO()
        sys.stderr = err

        obj2 = {
            'token': u'niño',
            'index': 1,
            'source': [u'a', u'boy', u'hits', u'a', u'dog'],
            'target': [u'un', u'niño', u'vapulea', u'un', u'perro'],
            'alignments': [[0], [1], [2], [3], [4]]
        }
        (t2, s2) = self.extractor_no_pos.get_features(obj2)
        self.assertEqual(
            err.getvalue(),
            'Tagging script and parameter file should be provided\nTagging script and parameter file should be provided\n'
        )
        err.close()
        self.assertEqual(t2, u'')
        self.assertEqual(s2, [])

    def test_only_target_tagging(self):
        # no alignments
        obj = {
            'token': u'niño',
            'index': 1,
            'source': [u'a', u'boy', u'hits', u'a', u'dog'],
            'target': [u'un', u'niño', u'vapulea', u'un', u'perro']
        }
        (t1, s1) = self.extractor_pos.get_features(obj)
        self.assertEqual(t1, 'NC')
        self.assertEqual(s1, [])