Esempio n. 1
0
class TestThirdParty(unittest.TestCase):
    def setUp(self):
        self.config = AppConfig('stemmer')
        self.fac = ImportConfigFactory(self.config, shared=False)

    def test_stemmer(self):
        tnfac = ImportConfigFactory(self.config)
        sent = 'Bobby is fast and runs with dogs, armies, and sheep from the police.'
        doc_parser = self.fac.instance(
            'doc_parser',
            token_normalizer=tnfac.instance('nonorm_token_normalizer'))
        doc = doc_parser.parse(sent)
        feats = tuple(doc.norm_token_iter())
        self.assertEqual(
            ('Bobby', 'is', 'fast', 'and', 'runs', 'with', 'dogs', ',',
             'armies', ',', 'and', 'sheep', 'from', 'the', 'police', '.'),
            feats)
        self.assertEqual(
            ('Bobby', 'be', 'fast', 'and', 'run', 'with', 'dog', ',', 'army',
             ',', 'and', 'sheep', 'from', 'the', 'police', '.'),
            tuple(map(lambda f: f.lemma_, doc.token_iter())))
        doc_parser = self.fac.instance(
            'doc_parser',
            token_normalizer=tnfac.instance('stemmer_token_normalizer'))
        doc = doc_parser.parse(sent)
        feats = tuple(doc.norm_token_iter())
        self.assertEqual(
            ('bobbi', 'is', 'fast', 'and', 'run', 'with', 'dog', ',', 'armi',
             ',', 'and', 'sheep', 'from', 'the', 'polic', '.'), feats)
Esempio n. 2
0
 def test_filter_features(self):
     tnfac = ImportConfigFactory(self.config)
     dp = self.fac('default_doc_parser',
                   token_normalizer=tnfac.instance(
                       'feature_no_filter_token_normalizer'))
     feats = dp(
         'I am a citizen of the United States of America.').token_iter()
     self.assertEqual(('I', 'am', 'a', 'citizen', 'of',
                       'the United States of America', '.'),
                      tuple(map(lambda f: f.norm, feats)))
     dp = self.fac('default_doc_parser',
                   token_normalizer=tnfac.instance(
                       'feature_default_filter_token_normalizer'))
     feats = dp.parse(
         'I am a citizen of the United States of America.').token_iter()
     self.assertEqual(
         ('I', 'am', 'citizen', 'of', 'the United States of America'),
         tuple(map(lambda f: f.norm, feats)))
     dp = self.fac('default_doc_parser',
                   token_normalizer=tnfac.instance(
                       'feature_stop_filter_token_normalizer'))
     feats = dp.parse(
         'I am a citizen of the United States of America.').token_iter()
     self.assertEqual(('citizen', 'the United States of America'),
                      tuple(map(lambda f: f.norm, feats)))
Esempio n. 3
0
 def create_facade(self) -> ModelFacade:
     """Create a new instance of the facade."""
     # we must create a new (non-shared) instance of the facade since it
     # will get deallcated after complete.
     config = self.config
     model_path = self.model_path
     if self.config_overwrites is not None:
         config = cp.deepcopy(config)
         config.merge(self.config_overwrites)
     if model_path is None:
         cf = ImportConfigFactory(config, **self.config_factory_args)
         facade: ModelFacade = cf.instance(self.facade_name)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(f'created facade: {facade}')
         self.dealloc_resources.extend((cf, facade))
     else:
         if logger.isEnabledFor(logging.INFO):
             logger.info(f'loading model from {model_path}')
         with dealloc(ImportConfigFactory(
                 config, **self.config_factory_args)) as cf:
             cls: Type[ModelFacade] = cf.get_class(self.facade_name)
         facade: ModelFacade = cls.load_from_path(model_path)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(f'created facade: {type(facade)} ' +
                          f'from path: {model_path}')
         self.dealloc_resources.append(facade)
     return facade
Esempio n. 4
0
class TestFeatureVectorization(unittest.TestCase):
    def setUp(self):
        if hasattr(self.__class__, 'CONF_FILE'):
            path = self.CONF_FILE
        else:
            path = 'test-resources/features.conf'
        config = AppConfig(path)
        self.fac = ImportConfigFactory(config, shared=True)
        self.sent_text = 'I am a citizen of the United States of America.'
        self.def_parse = ('I', 'am', 'a', 'citizen', 'of',
                          'the United States of America', '.')
        if not hasattr(self.__class__, 'NO_VECTORIZER'):
            self.vmng = self.fac.instance('feature_vectorizer_manager')
        self.sent_text2 = self.sent_text + " My name is Paul Landes."

    def assertTensorEquals(self, should, tensor):
        self.assertEqual(should.shape, tensor.shape)
        try:
            eq = TorchConfig.equal(should, tensor)
        except RuntimeError as e:
            logger.error(f'error comparing {should} with {tensor}')
            raise e
        if not eq:
            logger.error(f'tensor {should} does not equal {tensor}')
        self.assertTrue(eq)

    def _to_sparse(self, arr: Tensor):
        return SparseTensorFeatureContext.to_sparse(arr)[0][0]
Esempio n. 5
0
 def test_feature(self):
     tnfac = ImportConfigFactory(self.config, shared=False)
     tn = tnfac.instance('default_token_normalizer')
     doc_parser = self.fac('default_doc_parser', token_normalizer=tn)
     self.assertEqual(
         'MapTokenNormalizer: embed=True, reload=False, lemma_token_mapper',
         str(tn))
     fd = doc_parser(self.sent)
     res = fd.asdict()
     if 0:
         with open(self.config.feature_path, 'w') as f:
             f.write(fd.asjson(indent=4))
     with open(self.config.feature_path) as f:
         c = json.load(f)
     self.assertEqual(rec_sort(c), rec_sort(res))
     tn = tnfac.instance('nonorm_token_normalizer')
     doc_parser = self.fac('default_doc_parser', token_normalizer=tn)
     res = tuple(map(lambda x: x.norm, doc_parser(self.sent).token_iter()))
     self.assertEqual(('Dan', 'throws', 'the', 'ball', '.'), res)
Esempio n. 6
0
class SqliteTestCase(unittest.TestCase):
    def setUp(self):
        self.config = AppConfig.instance()
        self.target_path = Path('./target')
        if self.target_path.exists():
            shutil.rmtree(self.target_path)
        self.fac = ImportConfigFactory(self.config)

    @staticmethod
    def init_logging():
        logging.basicConfig(level=logging.INFO)
        logger.setLevel(logging.DEBUG)

    def _test_inst_persister(self):
        persister = self.fac.instance('inst_db_persister', row_factory=Person)
        db_path = Path(self.target_path, 'sql-test2.db')
        self.assertFalse(db_path.exists())
        self.assertEqual(0, persister.get_count())
        self.assertEqual(1, persister.insert_row('paul', 23))
        self.assertEqual(2, persister.insert_row('sue', 33))
        self.assertTrue(db_path.exists())
        peeps = persister.get()
        self.assertTrue(2, len(peeps))
        self.assertEqual({'id': 1, 'name': 'paul', 'age': 23}, peeps[0].get_attrs())
        self.assertEqual({'id': 2, 'name': 'sue', 'age': 33}, peeps[1].get_attrs())
        peeps = persister.get()
        self.assertEqual((1, 'paul', 23), peeps[0].get_row())
        self.assertEqual(('paul', 23), peeps[0].get_insert_row())
        peeps = persister.get()
        self.assertEqual('id: 1, name: paul, age: 23', str(peeps[0]))
        self.assertEqual('id: 2, name: sue, age: 33', str(peeps[1]))
        peeps = persister.get()
        self.assertEqual('id: 1, name: paul, age: 23', str(peeps[0]))
        self.assertEqual('id: 2, name: sue, age: 33', str(peeps[1]))
        new_peeps = (('bob', 42), ('jane', 90),)
        self.assertEqual(4, persister.insert_rows(new_peeps))
        peeps = persister.get()
        self.assertEqual({'id': 3, 'name': 'bob', 'age': 42}, peeps[0].get_attrs())
        self.assertEqual({'id': 4, 'name': 'jane', 'age': 90}, peeps[1].get_attrs())
        bean = Person('kyle', 52)
        self.assertEqual(None, bean.id)
        self.assertEqual(5, persister.insert(bean))
        self.assertEqual(5, bean.id)
        self.assertEqual(((5,),), persister.execute_by_name('people_count', row_factory='tuple'))
        peep = persister.get_by_id(2)
        self.assertEqual('id: 2, name: sue, age: 33', str(peep))
        peep = persister.get_by_id(5)
        self.assertEqual('id: 5, name: kyle, age: 52', str(peep))
        self.assertEqual(None, persister.get_by_id(100))
        self.assertTrue(persister.exists(1))
        self.assertTrue(persister.exists(5))
        self.assertFalse(persister.exists(100))
        peep = persister.get_by_id(2)
        peep.age = 41
        self.assertTrue(2, persister.update(peep))
        peep = persister.get_by_id(2)
        self.assertEqual('id: 2, name: sue, age: 41', str(peep))
        self.assertTrue(persister.exists(2))
        self.assertTrue(2, persister.delete(2))
        self.assertFalse(persister.exists(2))
        self.assertEqual(((4,),), persister.execute_by_name('people_count', row_factory='tuple'))
        self.assertEqual(4, persister.get_count())
        self.assertEqual((1, 3, 4, 5), tuple(persister.get_keys()))
        new_peeps = (Person('jake', 62), Person('christina', 22),)
        self.assertEqual(7, persister.insert_beans(new_peeps))
        peeps = persister.get()
        self.assertEqual({'id': 6, 'name': 'jake', 'age': 62}, peeps[2].get_attrs())
        self.assertEqual({'id': 7, 'name': 'christina', 'age': 22}, peeps[1].get_attrs())
        return persister
Esempio n. 7
0
class TestWordPieceTokenization(unittest.TestCase):
    def setUp(self):
        path = 'test-resources/transformer.conf'
        config = AppConfig(path)
        self.fac = ImportConfigFactory(config)
        self.vmng = self.fac.instance('feature_vectorizer_manager')

    def _test_tok(self, vec_name: str, sent: str, should_tok_len: int,
                  should: Tuple[Tuple[str, Tuple[str]]]):
        doc: FeatureDocument = self.vmng.parse(sent)
        vec = self.vmng[vec_name]
        tdoc: TokenizedFeatureDocument = vec.tokenize(doc)
        self.assertEqual(TokenizedFeatureDocument, type(tdoc))
        smaps = tdoc.map_word_pieces_to_tokens()
        self.assertEqual(len(should), len(smaps))
        for sent_map, should_sent in zip(smaps, should):
            sent: FeatureSentence = sent_map['sent']
            tmap: Tuple[FeatureToken, Tuple[str]] = sent_map['map']
            tok: FeatureToken
            ttoks: Tuple[str]
            for (tok, ttoks), (should_tok,
                               should_ttoks) in zip(tmap, should_sent):
                self.assertEqual(FeatureToken, type(tok))
                self.assertEqual(str, type(ttoks[0]))
                self.assertEqual(tok.norm, should_tok)
                self.assertEqual(ttoks, should_ttoks)
        arr = vec.transform(doc)
        self.assertEqual((len(should), should_tok_len, 768), tuple(arr.shape))

    def _test_sent_1(self, vec_name: str):
        sent = 'The gunships are nearer than you think. Their heading is changing.'
        should = ((('The', ('The', )), ('gunships', ('guns', 'hips')),
                   ('are', ('are', )), ('nearer', ('nearer', )), ('than',
                                                                  ('than', )),
                   ('you', ('you', )), ('think', ('think', )), ('.', ('.', ))),
                  (('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )), ('.',
                                                                    ('.', ))))
        self._test_tok(vec_name, sent, 11, should)

    def _test_sent_2(self, vec_name: str):
        sent = 'The guns are near. Their heading is changing to the gunships.'
        should = ((('The', ('The', )), ('guns', ('guns', )),
                   ('are', ('are', )), ('near', ('near', )), ('.', ('.', ))),
                  (('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )),
                   ('to', ('to', )), ('the', ('the', )),
                   ('gunships', ('guns', 'hips')), ('.', ('.', ))))
        self._test_tok(vec_name, sent, 11, should)

    def _test_sent_3(self, vec_name: str):
        sent = 'Their heading is changing to the gunships.'
        should = ((('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )),
                   ('to', ('to', )), ('the', ('the', )),
                   ('gunships', ('guns', 'hips')), ('.', ('.', ))), )
        self._test_tok(vec_name, sent, 11, should)

    def _test_sent_4(self, vec_name: str):
        sent = (
            'The guns are near. Their heading is changing to the gunships.' +
            ' The United States schooner created a gridlocking situation.')
        should = ((('The', ('The', )), ('guns', ('guns', )),
                   ('are', ('are', )), ('near', ('near', )), ('.', ('.', ))),
                  (('Their', ('Their', )), ('heading', ('heading', )),
                   ('is', ('is', )), ('changing', ('changing', )),
                   ('to', ('to', )), ('the', ('the', )),
                   ('gunships', ('guns', 'hips')), ('.', ('.', ))),
                  (('The', ('The', )), ('United States', ('United', 'States')),
                   ('schooner', (
                       'sch',
                       'oon',
                       'er',
                   )) if vec_name == 'transformer_roberta' else
                   ('schooner', ('schooner', )), ('created', ('created', )),
                   ('a', ('a', )),
                   ('gridlocking',
                    ('grid',
                     'locking')) if vec_name == 'transformer_roberta' else
                   ('gridlocking',
                    ('grid', 'lock',
                     'ing')), ('situation', ('situation', )), ('.', ('.', ))))
        self._test_tok(vec_name, sent,
                       14 if vec_name == 'transformer_roberta' else 13, should)

    def test_bert(self):
        vec_name = 'transformer_bert'
        self._test_sent_1(vec_name)
        self._test_sent_2(vec_name)
        self._test_sent_3(vec_name)
        self._test_sent_4(vec_name)

    def test_roberta(self):
        vec_name = 'transformer_roberta'
        self._test_sent_1(vec_name)
        self._test_sent_2(vec_name)
        self._test_sent_3(vec_name)
        self._test_sent_4(vec_name)

    def test_distilbert(self):
        vec_name = 'transformer_distilbert'
        self._test_sent_1(vec_name)
        self._test_sent_2(vec_name)
        self._test_sent_3(vec_name)
        self._test_sent_4(vec_name)