def test_pattern_lookups(self): featextractor = HtmlFeatureExtractor( token_features=[token_lower, token_identity], global_features=[ Pattern((0, 'lower'), (1, 'token'), out_value='OUT'), ]) X = featextractor.transform_single(self.html_tokens) self.assertListEqual( [feat['lower/token[+1]'] for feat in X], ['hello/John', 'john/Doe', 'doe/Mary', 'mary/said', 'said/OUT'])
def test_pattern_lookups(self): featextractor = HtmlFeatureExtractor( token_features = [token_lower, token_identity], global_features=[ Pattern((0, 'lower'), (1, 'token'), out_value='OUT'), ] ) X = featextractor.transform_single(self.html_tokens) self.assertListEqual( [feat['lower/token[+1]'] for feat in X], ['hello/John', 'john/Doe', 'doe/Mary', 'mary/said', 'said/OUT'] )
def test_pattern(self): featextractor = HtmlFeatureExtractor( token_features=[token_lower, token_identity], global_features=[Pattern((-2, 'lower'), (-1, 'lower'))]) X = featextractor.transform_single(self.html_tokens) key = 'lower[-2]/lower[-1]' self.assertNotIn(key, X[0]) self.assertListEqual( [feat[key] for feat in X[1:]], ['?/hello', 'hello/john', 'john/doe', 'doe/mary'], )
def test_pattern(self): featextractor = HtmlFeatureExtractor( token_features = [token_lower, token_identity], global_features = [ Pattern((-2, 'lower'), (-1, 'lower')) ] ) X = featextractor.transform_single(self.html_tokens) key = 'lower[-2]/lower[-1]' self.assertNotIn(key, X[0]) self.assertListEqual( [feat[key] for feat in X[1:]], ['?/hello', 'hello/john', 'john/doe', 'doe/mary'], )