class TestConllExtractor(unittest.TestCase): def setUp(self): self.extractor = ConllExtractor() self.text = ''' Python is a widely used general-purpose, high-level programming language. Its design philosophy emphasizes code readability, and its syntax allows programmers to express concepts in fewer lines of code than would be possible in other languages. The language provides constructs intended to enable clear programs on both a small and large scale. ''' self.sentence = "Python is a widely used general-purpose, high-level programming language" @attr('slow') def test_extract(self): noun_phrases = self.extractor.extract(self.text) assert_true("Python" in noun_phrases) assert_true("design philosophy" in noun_phrases) assert_true("code readability" in noun_phrases) @attr('slow') def test_parse_sentence(self): parsed = self.extractor._parse_sentence(self.sentence) assert_true(isinstance(parsed, nltk.tree.Tree)) @attr('slow') def test_filter_insignificant(self): chunk = self.extractor._parse_sentence(self.sentence) tags = [tag for word, tag in chunk.leaves()] assert_true('DT' in tags) filtered = filter_insignificant(chunk.leaves()) tags = [tag for word, tag in filtered] assert_true("DT" not in tags)
def setUp(self): self.extractor = ConllExtractor() self.text = ''' Python is a widely used general-purpose, high-level programming language. Its design philosophy emphasizes code readability, and its syntax allows programmers to express concepts in fewer lines of code than would be possible in other languages. The language provides constructs intended to enable clear programs on both a small and large scale. ''' self.sentence = "Python is a widely used general-purpose, high-level programming language"
def test_overrides(self): b = tb.Blobber(tokenizer=SentenceTokenizer(), np_extractor=ConllExtractor()) blob = b("How now? Brown cow?") assert_true(isinstance(blob.tokenizer, SentenceTokenizer)) assert_equal(blob.tokens, tb.WordList(["How now?", "Brown cow?"])) blob2 = b("Another blob") # blobs have the same tokenizer assert_true(blob.tokenizer is blob2.tokenizer) # but aren't the same object assert_not_equal(blob, blob2)
def __init__(self): # create custom components self.naive_bayes_analyzer = NaiveBayesAnalyzer() self.conll_extractor = ConllExtractor() self.nltk_tagger = NLTKTagger() self.perceptron_tagger = PerceptronTagger() if DEV_ENV: return # train all components (default and custom) text = 'TextBlob blobs great!' default_blob = TextBlob(text) default_blob.sentiment default_blob.noun_phrases default_blob.pos_tags custom_blob = TextBlob(text, analyzer=self.naive_bayes_analyzer, np_extractor=self.conll_extractor, pos_tagger=self.nltk_tagger) custom_blob.sentiment custom_blob.noun_phrases custom_blob.pos_tags custom2_blob = TextBlob(text, pos_tagger=self.perceptron_tagger) custom2_blob.pos_tags
def test_can_pass_np_extractor_to_constructor(self): e = ConllExtractor() blob = tb.TextBlob('Hello world!', np_extractor=e) assert_true(isinstance(blob.np_extractor, ConllExtractor))
def test_can_use_different_np_extractors(self): e = ConllExtractor() text = "Python is a high-level scripting language." blob = tb.TextBlob(text) blob.np_extractor = e assert_true(isinstance(blob.np_extractor, ConllExtractor))