Example #1
0
 def test_pattern_lookups(self):
     featextractor = HtmlFeatureExtractor(
         token_features=[token_lower, token_identity],
         global_features=[
             Pattern((0, 'lower'), (1, 'token'), out_value='OUT'),
         ])
     X = featextractor.transform_single(self.html_tokens)
     self.assertListEqual(
         [feat['lower/token[+1]'] for feat in X],
         ['hello/John', 'john/Doe', 'doe/Mary', 'mary/said', 'said/OUT'])
 def test_pattern_lookups(self):
     featextractor = HtmlFeatureExtractor(
         token_features = [token_lower, token_identity],
         global_features=[
             Pattern((0, 'lower'), (1, 'token'), out_value='OUT'),
         ]
     )
     X = featextractor.transform_single(self.html_tokens)
     self.assertListEqual(
         [feat['lower/token[+1]'] for feat in X],
         ['hello/John', 'john/Doe', 'doe/Mary', 'mary/said', 'said/OUT']
     )
Example #3
0
    def test_pattern(self):
        featextractor = HtmlFeatureExtractor(
            token_features=[token_lower, token_identity],
            global_features=[Pattern((-2, 'lower'), (-1, 'lower'))])
        X = featextractor.transform_single(self.html_tokens)

        key = 'lower[-2]/lower[-1]'
        self.assertNotIn(key, X[0])
        self.assertListEqual(
            [feat[key] for feat in X[1:]],
            ['?/hello', 'hello/john', 'john/doe', 'doe/mary'],
        )
    def test_pattern(self):
        featextractor = HtmlFeatureExtractor(
            token_features = [token_lower, token_identity],
            global_features = [
                Pattern((-2, 'lower'), (-1, 'lower'))
            ]
        )
        X = featextractor.transform_single(self.html_tokens)

        key = 'lower[-2]/lower[-1]'
        self.assertNotIn(key, X[0])
        self.assertListEqual(
            [feat[key] for feat in X[1:]],
            ['?/hello', 'hello/john', 'john/doe', 'doe/mary'],
        )