Ejemplo n.º 1
0
class TestThirdParty(unittest.TestCase):
    def setUp(self):
        self.config = AppConfig('stemmer')
        self.fac = ImportConfigFactory(self.config, shared=False)

    def test_stemmer(self):
        tnfac = ImportConfigFactory(self.config)
        sent = 'Bobby is fast and runs with dogs, armies, and sheep from the police.'
        doc_parser = self.fac.instance(
            'doc_parser',
            token_normalizer=tnfac.instance('nonorm_token_normalizer'))
        doc = doc_parser.parse(sent)
        feats = tuple(doc.norm_token_iter())
        self.assertEqual(
            ('Bobby', 'is', 'fast', 'and', 'runs', 'with', 'dogs', ',',
             'armies', ',', 'and', 'sheep', 'from', 'the', 'police', '.'),
            feats)
        self.assertEqual(
            ('Bobby', 'be', 'fast', 'and', 'run', 'with', 'dog', ',', 'army',
             ',', 'and', 'sheep', 'from', 'the', 'police', '.'),
            tuple(map(lambda f: f.lemma_, doc.token_iter())))
        doc_parser = self.fac.instance(
            'doc_parser',
            token_normalizer=tnfac.instance('stemmer_token_normalizer'))
        doc = doc_parser.parse(sent)
        feats = tuple(doc.norm_token_iter())
        self.assertEqual(
            ('bobbi', 'is', 'fast', 'and', 'run', 'with', 'dog', ',', 'armi',
             ',', 'and', 'sheep', 'from', 'the', 'polic', '.'), feats)
Ejemplo n.º 2
0
 def create_facade(self) -> ModelFacade:
     """Create a new instance of the facade."""
     # we must create a new (non-shared) instance of the facade since it
     # will get deallcated after complete.
     config = self.config
     model_path = self.model_path
     if self.config_overwrites is not None:
         config = cp.deepcopy(config)
         config.merge(self.config_overwrites)
     if model_path is None:
         cf = ImportConfigFactory(config, **self.config_factory_args)
         facade: ModelFacade = cf.instance(self.facade_name)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(f'created facade: {facade}')
         self.dealloc_resources.extend((cf, facade))
     else:
         if logger.isEnabledFor(logging.INFO):
             logger.info(f'loading model from {model_path}')
         with dealloc(ImportConfigFactory(
                 config, **self.config_factory_args)) as cf:
             cls: Type[ModelFacade] = cf.get_class(self.facade_name)
         facade: ModelFacade = cls.load_from_path(model_path)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(f'created facade: {type(facade)} ' +
                          f'from path: {model_path}')
         self.dealloc_resources.append(facade)
     return facade
Ejemplo n.º 3
0
 def test_filter_features(self):
     tnfac = ImportConfigFactory(self.config)
     dp = self.fac('default_doc_parser',
                   token_normalizer=tnfac.instance(
                       'feature_no_filter_token_normalizer'))
     feats = dp(
         'I am a citizen of the United States of America.').token_iter()
     self.assertEqual(('I', 'am', 'a', 'citizen', 'of',
                       'the United States of America', '.'),
                      tuple(map(lambda f: f.norm, feats)))
     dp = self.fac('default_doc_parser',
                   token_normalizer=tnfac.instance(
                       'feature_default_filter_token_normalizer'))
     feats = dp.parse(
         'I am a citizen of the United States of America.').token_iter()
     self.assertEqual(
         ('I', 'am', 'citizen', 'of', 'the United States of America'),
         tuple(map(lambda f: f.norm, feats)))
     dp = self.fac('default_doc_parser',
                   token_normalizer=tnfac.instance(
                       'feature_stop_filter_token_normalizer'))
     feats = dp.parse(
         'I am a citizen of the United States of America.').token_iter()
     self.assertEqual(('citizen', 'the United States of America'),
                      tuple(map(lambda f: f.norm, feats)))
Ejemplo n.º 4
0
 def setUp(self):
     if hasattr(self.__class__, 'CONF_FILE'):
         path = self.CONF_FILE
     else:
         path = 'test-resources/features.conf'
     config = AppConfig(path)
     self.fac = ImportConfigFactory(config, shared=True)
     self.sent_text = 'I am a citizen of the United States of America.'
     self.def_parse = ('I', 'am', 'a', 'citizen', 'of',
                       'the United States of America', '.')
     if not hasattr(self.__class__, 'NO_VECTORIZER'):
         self.vmng = self.fac.instance('feature_vectorizer_manager')
     self.sent_text2 = self.sent_text + " My name is Paul Landes."
Ejemplo n.º 5
0
class TestFeatureVectorization(unittest.TestCase):
    def setUp(self):
        if hasattr(self.__class__, 'CONF_FILE'):
            path = self.CONF_FILE
        else:
            path = 'test-resources/features.conf'
        config = AppConfig(path)
        self.fac = ImportConfigFactory(config, shared=True)
        self.sent_text = 'I am a citizen of the United States of America.'
        self.def_parse = ('I', 'am', 'a', 'citizen', 'of',
                          'the United States of America', '.')
        if not hasattr(self.__class__, 'NO_VECTORIZER'):
            self.vmng = self.fac.instance('feature_vectorizer_manager')
        self.sent_text2 = self.sent_text + " My name is Paul Landes."

    def assertTensorEquals(self, should, tensor):
        self.assertEqual(should.shape, tensor.shape)
        try:
            eq = TorchConfig.equal(should, tensor)
        except RuntimeError as e:
            logger.error(f'error comparing {should} with {tensor}')
            raise e
        if not eq:
            logger.error(f'tensor {should} does not equal {tensor}')
        self.assertTrue(eq)

    def _to_sparse(self, arr: Tensor):
        return SparseTensorFeatureContext.to_sparse(arr)[0][0]
Ejemplo n.º 6
0
 def setUp(self):
     path = self.CONFIG
     config = AppConfig(path)
     self.fac = ImportConfigFactory(config, shared=True)
     self.sent_text = 'I am a citizen of the United States of America.'
     self.def_parse = ('I', 'am', 'a', 'citizen', 'of',
                       'the United States of America', '.')
     self.sent_text2 = self.sent_text + " My name is Paul Landes."
Ejemplo n.º 7
0
 def recreate_factory(self):
     if hasattr(self.__class__, 'CONF_FILE'):
         path = self.CONF_FILE
         env = {'app_root': '.'}
         self.config = AppEnvConfig(path, env=env)
     else:
         path = f'test-resources/{self.CONF}.conf'
         self.config = AppConfig(path)
     self.fac = ImportConfigFactory(self.config)
Ejemplo n.º 8
0
 def test_feature(self):
     tnfac = ImportConfigFactory(self.config, shared=False)
     tn = tnfac.instance('default_token_normalizer')
     doc_parser = self.fac('default_doc_parser', token_normalizer=tn)
     self.assertEqual(
         'MapTokenNormalizer: embed=True, reload=False, lemma_token_mapper',
         str(tn))
     fd = doc_parser(self.sent)
     res = fd.asdict()
     if 0:
         with open(self.config.feature_path, 'w') as f:
             f.write(fd.asjson(indent=4))
     with open(self.config.feature_path) as f:
         c = json.load(f)
     self.assertEqual(rec_sort(c), rec_sort(res))
     tn = tnfac.instance('nonorm_token_normalizer')
     doc_parser = self.fac('default_doc_parser', token_normalizer=tn)
     res = tuple(map(lambda x: x.norm, doc_parser(self.sent).token_iter()))
     self.assertEqual(('Dan', 'throws', 'the', 'ball', '.'), res)
Ejemplo n.º 9
0
def factory():
    config = AppConfig('test-resources/mnist/mnist.conf',
                       env={'app_root': '.'})
    fac = ImportConfigFactory(config, reload=False)
    return fac
Ejemplo n.º 10
0
 def setUp(self):
     config = ImportIniConfig('test-resources/dl.conf')
     self.fac = ImportConfigFactory(config, shared=True)
     self.sent_text = 'I am a citizen of the United States of America.'
Ejemplo n.º 11
0
    def config_factory(self):
        """The configuration factory used to create facades.

        """
        return ImportConfigFactory(self.config, **self._config_factory_params)
Ejemplo n.º 12
0
 def setUp(self):
     self.config = AppConfig('stemmer')
     self.fac = ImportConfigFactory(self.config, shared=False)
Ejemplo n.º 13
0
 def setUp(self):
     path = 'test-resources/transformer.conf'
     config = AppConfig(path)
     self.fac = ImportConfigFactory(config)
     self.vmng = self.fac.instance('feature_vectorizer_manager')
Ejemplo n.º 14
0
 def setUp(self):
     TorchConfig.init()
     config = AppConfig('test-resources/iris/iris.conf',
                        env={'app_root': '.'})
     self.config = config
     self.fac = ImportConfigFactory(config, shared=True, reload=False)
Ejemplo n.º 15
0
class TestWordPieceTokenization(unittest.TestCase):
    def setUp(self):
        path = 'test-resources/transformer.conf'
        config = AppConfig(path)
        self.fac = ImportConfigFactory(config)
        self.vmng = self.fac.instance('feature_vectorizer_manager')

    def _test_tok(self, vec_name: str, sent: str, should_tok_len: int,
                  should: Tuple[Tuple[str, Tuple[str]]]):
        doc: FeatureDocument = self.vmng.parse(sent)
        vec = self.vmng[vec_name]
        tdoc: TokenizedFeatureDocument = vec.tokenize(doc)
        self.assertEqual(TokenizedFeatureDocument, type(tdoc))
        smaps = tdoc.map_word_pieces_to_tokens()
        self.assertEqual(len(should), len(smaps))
        for sent_map, should_sent in zip(smaps, should):
            sent: FeatureSentence = sent_map['sent']
            tmap: Tuple[FeatureToken, Tuple[str]] = sent_map['map']
            tok: FeatureToken
            ttoks: Tuple[str]
            for (tok, ttoks), (should_tok,
                               should_ttoks) in zip(tmap, should_sent):
                self.assertEqual(FeatureToken, type(tok))
                self.assertEqual(str, type(ttoks[0]))
                self.assertEqual(tok.norm, should_tok)
                self.assertEqual(ttoks, should_ttoks)
        arr = vec.transform(doc)
        self.assertEqual((len(should), should_tok_len, 768), tuple(arr.shape))

    def _test_sent_1(self, vec_name: str):
        sent = 'The gunships are nearer than you think. Their heading is changing.'
        should = ((('The', ('The', )), ('gunships', ('guns', 'hips')),
                   ('are', ('are', )), ('nearer', ('nearer', )), ('than',
                                                                  ('than', )),
                   ('you', ('you', )), ('think', ('think', )), ('.', ('.', ))),
                  (('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )), ('.',
                                                                    ('.', ))))
        self._test_tok(vec_name, sent, 11, should)

    def _test_sent_2(self, vec_name: str):
        sent = 'The guns are near. Their heading is changing to the gunships.'
        should = ((('The', ('The', )), ('guns', ('guns', )),
                   ('are', ('are', )), ('near', ('near', )), ('.', ('.', ))),
                  (('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )),
                   ('to', ('to', )), ('the', ('the', )),
                   ('gunships', ('guns', 'hips')), ('.', ('.', ))))
        self._test_tok(vec_name, sent, 11, should)

    def _test_sent_3(self, vec_name: str):
        sent = 'Their heading is changing to the gunships.'
        should = ((('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )),
                   ('to', ('to', )), ('the', ('the', )),
                   ('gunships', ('guns', 'hips')), ('.', ('.', ))), )
        self._test_tok(vec_name, sent, 11, should)

    def _test_sent_4(self, vec_name: str):
        sent = (
            'The guns are near. Their heading is changing to the gunships.' +
            ' The United States schooner created a gridlocking situation.')
        should = ((('The', ('The', )), ('guns', ('guns', )),
                   ('are', ('are', )), ('near', ('near', )), ('.', ('.', ))),
                  (('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )),
                   ('to', ('to', )), ('the', ('the', )),
                   ('gunships', ('guns', 'hips')), ('.', ('.', ))),
                  (('The', ('The', )), ('United States', ('United', 'States')),
                   ('schooner', (
                       'sch',
                       'oon',
                       'er',
                   )) if vec_name == 'transformer_roberta' else
                   ('schooner', ('schooner', )), ('created', ('created', )),
                   ('a', ('a', )),
                   ('gridlocking',
                    ('grid',
                     'locking')) if vec_name == 'transformer_roberta' else
                   ('gridlocking',
                    ('grid', 'lock',
                     'ing')), ('situation', ('situation', )), ('.', ('.', ))))
        self._test_tok(vec_name, sent,
                       14 if vec_name == 'transformer_roberta' else 13, should)

    def test_bert(self):
        vec_name = 'transformer_bert'
        self._test_sent_1(vec_name)
        self._test_sent_2(vec_name)
        self._test_sent_3(vec_name)
        self._test_sent_4(vec_name)

    def test_roberta(self):
        vec_name = 'transformer_roberta'
        self._test_sent_1(vec_name)
        self._test_sent_2(vec_name)
        self._test_sent_3(vec_name)
        self._test_sent_4(vec_name)

    def test_distilbert(self):
        vec_name = 'transformer_distilbert'
        self._test_sent_1(vec_name)
        self._test_sent_2(vec_name)
        self._test_sent_3(vec_name)
        self._test_sent_4(vec_name)
Ejemplo n.º 16
0
#!/usr/bin/env python

from io import StringIO
from zensols.config import ImportIniConfig, ImportConfigFactory
from zensols.nlp import FeatureDocument, FeatureDocumentParser

CONFIG = """
[import]
sections = list: imp_conf

# import the ``zensols.nlp`` library
[imp_conf]
type = importini
config_files = list: resource(zensols.nlp): resources/obj.conf

# override the parse to keep only the norm, ent
[doc_parser]
token_feature_ids = eval: set('ent_ tag_'.split())
"""

if __name__ == '__main__':
    fac = ImportConfigFactory(ImportIniConfig(StringIO(CONFIG)))
    doc_parser: FeatureDocumentParser = fac('doc_parser')
    sent = 'He was George Washington and first president of the United States.'
    doc: FeatureDocument = doc_parser(sent)
    for tok in doc.tokens:
        tok.write()
Ejemplo n.º 17
0
class SqliteTestCase(unittest.TestCase):
    def setUp(self):
        self.config = AppConfig.instance()
        self.target_path = Path('./target')
        if self.target_path.exists():
            shutil.rmtree(self.target_path)
        self.fac = ImportConfigFactory(self.config)

    @staticmethod
    def init_logging():
        logging.basicConfig(level=logging.INFO)
        logger.setLevel(logging.DEBUG)

    def _test_inst_persister(self):
        persister = self.fac.instance('inst_db_persister', row_factory=Person)
        db_path = Path(self.target_path, 'sql-test2.db')
        self.assertFalse(db_path.exists())
        self.assertEqual(0, persister.get_count())
        self.assertEqual(1, persister.insert_row('paul', 23))
        self.assertEqual(2, persister.insert_row('sue', 33))
        self.assertTrue(db_path.exists())
        peeps = persister.get()
        self.assertTrue(2, len(peeps))
        self.assertEqual({'id': 1, 'name': 'paul', 'age': 23}, peeps[0].get_attrs())
        self.assertEqual({'id': 2, 'name': 'sue', 'age': 33}, peeps[1].get_attrs())
        peeps = persister.get()
        self.assertEqual((1, 'paul', 23), peeps[0].get_row())
        self.assertEqual(('paul', 23), peeps[0].get_insert_row())
        peeps = persister.get()
        self.assertEqual('id: 1, name: paul, age: 23', str(peeps[0]))
        self.assertEqual('id: 2, name: sue, age: 33', str(peeps[1]))
        peeps = persister.get()
        self.assertEqual('id: 1, name: paul, age: 23', str(peeps[0]))
        self.assertEqual('id: 2, name: sue, age: 33', str(peeps[1]))
        new_peeps = (('bob', 42), ('jane', 90),)
        self.assertEqual(4, persister.insert_rows(new_peeps))
        peeps = persister.get()
        self.assertEqual({'id': 3, 'name': 'bob', 'age': 42}, peeps[0].get_attrs())
        self.assertEqual({'id': 4, 'name': 'jane', 'age': 90}, peeps[1].get_attrs())
        bean = Person('kyle', 52)
        self.assertEqual(None, bean.id)
        self.assertEqual(5, persister.insert(bean))
        self.assertEqual(5, bean.id)
        self.assertEqual(((5,),), persister.execute_by_name('people_count', row_factory='tuple'))
        peep = persister.get_by_id(2)
        self.assertEqual('id: 2, name: sue, age: 33', str(peep))
        peep = persister.get_by_id(5)
        self.assertEqual('id: 5, name: kyle, age: 52', str(peep))
        self.assertEqual(None, persister.get_by_id(100))
        self.assertTrue(persister.exists(1))
        self.assertTrue(persister.exists(5))
        self.assertFalse(persister.exists(100))
        peep = persister.get_by_id(2)
        peep.age = 41
        self.assertTrue(2, persister.update(peep))
        peep = persister.get_by_id(2)
        self.assertEqual('id: 2, name: sue, age: 41', str(peep))
        self.assertTrue(persister.exists(2))
        self.assertTrue(2, persister.delete(2))
        self.assertFalse(persister.exists(2))
        self.assertEqual(((4,),), persister.execute_by_name('people_count', row_factory='tuple'))
        self.assertEqual(4, persister.get_count())
        self.assertEqual((1, 3, 4, 5), tuple(persister.get_keys()))
        new_peeps = (Person('jake', 62), Person('christina', 22),)
        self.assertEqual(7, persister.insert_beans(new_peeps))
        peeps = persister.get()
        self.assertEqual({'id': 6, 'name': 'jake', 'age': 62}, peeps[2].get_attrs())
        self.assertEqual({'id': 7, 'name': 'christina', 'age': 22}, peeps[1].get_attrs())
        return persister
Ejemplo n.º 18
0
 def setUp(self):
     self.maxDiff = 999999
     self.config = AppConfig()
     self.fac = ImportConfigFactory(self.config, shared=False)
     self.doc_parser = self.fac('default_doc_parser')
     self.sent = 'Dan throws the ball.'
Ejemplo n.º 19
0
 def setUp(self):
     self.config = AppConfig.instance()
     self.target_path = Path('./target')
     if self.target_path.exists():
         shutil.rmtree(self.target_path)
     self.fac = ImportConfigFactory(self.config)
Ejemplo n.º 20
0
 def _parser(self, conf, s):
     config = ImportIniConfig(f'test-resources/{conf}-comp.conf')
     fac = ImportConfigFactory(config)
     doc_parser: FeatureDocumentParser = fac('doc_parser')
     return doc_parser(s)
Ejemplo n.º 21
0
def factory(reload=True):
    config = AppConfig('test-resources/adult/adult.conf',
                       env={'app_root': '.'})
    fac = ImportConfigFactory(config, shared=True)
    return fac
Ejemplo n.º 22
0
 def __post_init__(self):
     ta = ImportConfigFactory(self.config, reload=self.reload)
     self.mappers = tuple(map(ta.instance, self.mapper_class_list))