Python Featurizer Examples

Programming Language: Python

Namespace/Package Name: featurize

Method/Function: Featurizer

Examples at hotexamples.com: 7

Python Featurizer - 7 examples found. These are the top rated real world Python examples of featurize.Featurizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def test_regex_extract_not_enough_input2(self):
     w = featurize.WebCorpusExtractor(regex_filter=[
         r'<CAS<([^<>]+)>',
     ])
     f = featurize.Featurizer(6, 11, label_extractor=w)
     f.featurize_stream(io.StringIO(input_with_cases))
     self.assertFalse(f.dataset.full)

Example #2

Show file

 def test_regex_extract(self):
     w = featurize.WebCorpusExtractor(regex_filter=[
         r'<CAS<([^<>]+)>',
     ])
     f = featurize.Featurizer(2, 20, label_extractor=w)
     f.featurize_stream(io.StringIO(input_with_cases))
     self.assertEqual(len(f.dataset), 4)

Example #3

Show file

 def test_keep_duplicates(self):
     s = len(input_with_duplicates.split('\n'))
     f = featurize.Featurizer(30, 300, skip_duplicates=False)
     f.featurize_stream(io.StringIO(input_with_duplicates))
     self.assertEqual(len(f.dataset), s)

Example #4

Show file

 def test_empty_extractor2(self):
     f = featurize.Featurizer(3)
     f.featurize_stream(io.StringIO(input_with_cases))
     self.assertIn('részletez/VERB<INF>', f.dataset.labels)

Example #5

Show file

 def test_empty_extractor(self):
     s = len(input_with_cases.strip().split('\n'))
     f = featurize.Featurizer(3)
     f.featurize_stream(io.StringIO(input_with_cases))
     self.assertEqual(len(f.dataset), s)

Example #6

Show file

 def test_pos_extract_not_enough_input(self):
     w = featurize.WebCorpusExtractor(grep_filter=["NOUN", "VERB"])
     f = featurize.Featurizer(200, 20, label_extractor=w)
     f.featurize_stream(io.StringIO(input_simple))
     self.assertFalse(f.dataset.full)

Example #7

Show file

 def test_pos_extract(self):
     w = featurize.WebCorpusExtractor(grep_filter=["NOUN", "VERB"])
     f = featurize.Featurizer(2, 20, label_extractor=w)
     f.featurize_stream(io.StringIO(input_simple))
     self.assertEqual(len(f.dataset), 4)
     self.assertTrue(f.dataset.full)