class TestConllExtractor(unittest.TestCase):

    def setUp(self):
        self.extractor = ConllExtractor()
        self.text = '''
Python is a widely used general-purpose,
high-level programming language. Its design philosophy emphasizes code
readability, and its syntax allows programmers to express concepts in fewer lines
of code than would be possible in other languages. The language provides
constructs intended to enable clear programs on both a small and large scale.
'''
        self.sentence = "Python is a widely used general-purpose, high-level programming language"

    @attr('slow')
    def test_extract(self):
        noun_phrases = self.extractor.extract(self.text)
        assert_true("Python" in noun_phrases)
        assert_true("design philosophy" in noun_phrases)
        assert_true("code readability" in noun_phrases)

    @attr('slow')
    def test_parse_sentence(self):
        parsed = self.extractor._parse_sentence(self.sentence)
        assert_true(isinstance(parsed, nltk.tree.Tree))

    @attr('slow')
    def test_filter_insignificant(self):
        chunk = self.extractor._parse_sentence(self.sentence)
        tags = [tag for word, tag in chunk.leaves()]
        assert_true('DT' in tags)
        filtered = filter_insignificant(chunk.leaves())
        tags = [tag for word, tag in filtered]
        assert_true("DT" not in tags)
Beispiel #2
0
class TestConllExtractor(unittest.TestCase):
    def setUp(self):
        self.extractor = ConllExtractor()
        self.text = '''
Python is a widely used general-purpose,
high-level programming language. Its design philosophy emphasizes code
readability, and its syntax allows programmers to express concepts in fewer lines
of code than would be possible in other languages. The language provides
constructs intended to enable clear programs on both a small and large scale.
'''
        self.sentence = "Python is a widely used general-purpose, high-level programming language"

    @attr('slow')
    def test_extract(self):
        noun_phrases = self.extractor.extract(self.text)
        assert_true("Python" in noun_phrases)
        assert_true("design philosophy" in noun_phrases)
        assert_true("code readability" in noun_phrases)

    @attr('slow')
    def test_parse_sentence(self):
        parsed = self.extractor._parse_sentence(self.sentence)
        assert_true(isinstance(parsed, nltk.tree.Tree))

    @attr('slow')
    def test_filter_insignificant(self):
        chunk = self.extractor._parse_sentence(self.sentence)
        tags = [tag for word, tag in chunk.leaves()]
        assert_true('DT' in tags)
        filtered = filter_insignificant(chunk.leaves())
        tags = [tag for word, tag in filtered]
        assert_true("DT" not in tags)
Beispiel #3
0
    def setUp(self):
        self.extractor = ConllExtractor()
        self.text = '''
Python is a widely used general-purpose,
high-level programming language. Its design philosophy emphasizes code
readability, and its syntax allows programmers to express concepts in fewer lines
of code than would be possible in other languages. The language provides
constructs intended to enable clear programs on both a small and large scale.
'''
        self.sentence = "Python is a widely used general-purpose, high-level programming language"
    def setUp(self):
        self.extractor = ConllExtractor()
        self.text = '''
Python is a widely used general-purpose,
high-level programming language. Its design philosophy emphasizes code
readability, and its syntax allows programmers to express concepts in fewer lines
of code than would be possible in other languages. The language provides
constructs intended to enable clear programs on both a small and large scale.
'''
        self.sentence = "Python is a widely used general-purpose, high-level programming language"
Beispiel #5
0
 def test_overrides(self):
     b = tb.Blobber(tokenizer=SentenceTokenizer(),
                    np_extractor=ConllExtractor())
     blob = b("How now? Brown cow?")
     assert_true(isinstance(blob.tokenizer, SentenceTokenizer))
     assert_equal(blob.tokens, tb.WordList(["How now?", "Brown cow?"]))
     blob2 = b("Another blob")
     # blobs have the same tokenizer
     assert_true(blob.tokenizer is blob2.tokenizer)
     # but aren't the same object
     assert_not_equal(blob, blob2)
 def __init__(self):
     # create custom components
     self.naive_bayes_analyzer = NaiveBayesAnalyzer()
     self.conll_extractor = ConllExtractor()
     self.nltk_tagger = NLTKTagger()
     self.perceptron_tagger = PerceptronTagger()
     if DEV_ENV:
         return
     # train all components (default and custom)
     text = 'TextBlob blobs great!'
     default_blob = TextBlob(text)
     default_blob.sentiment
     default_blob.noun_phrases
     default_blob.pos_tags
     custom_blob = TextBlob(text,
                            analyzer=self.naive_bayes_analyzer,
                            np_extractor=self.conll_extractor,
                            pos_tagger=self.nltk_tagger)
     custom_blob.sentiment
     custom_blob.noun_phrases
     custom_blob.pos_tags
     custom2_blob = TextBlob(text, pos_tagger=self.perceptron_tagger)
     custom2_blob.pos_tags
Beispiel #7
0
 def test_can_pass_np_extractor_to_constructor(self):
     e = ConllExtractor()
     blob = tb.TextBlob('Hello world!', np_extractor=e)
     assert_true(isinstance(blob.np_extractor, ConllExtractor))
Beispiel #8
0
 def test_can_use_different_np_extractors(self):
     e = ConllExtractor()
     text = "Python is a high-level scripting language."
     blob = tb.TextBlob(text)
     blob.np_extractor = e
     assert_true(isinstance(blob.np_extractor, ConllExtractor))