コード例 #1
0
    def test_dirty_text_summary(self):
        test_text = '最新 科技 http: 新闻和创业 公司信息 ”∆˙∫˚˜ ˜µ∆∫˙© ∆∆˚µ˚' + self.test_text[
            0]
        summarizer = Summarizer(test_text)
        summarizer.parse()

        text = summarizer.summarize()
        keywords = summarizer.keywords()
        key_noun_phrases = summarizer.key_noun_phrases()

        self.assertEqual(text, [
            'This is happening in the city of Tianjin, about an hours drive south of '
            'Beijing, within a gleaming office building that belongs to iFlytek, one of '
            'Chinas rapidly rising artificial-intelligence companies.',
            'Beyond guarded gates, inside a glitzy showroom, the US president is on a '
            'large TV screen heaping praise on the Chinese company.',
            'This is happening in the city of Tianjin, about an hours drive south of '
            'Beijing, within a gleaming office building that belongs to iFlytek, one of '
            'Chinas rapidly rising artificial-intelligence companies.',
            'Beyond guarded gates, inside a glitzy showroom, the US president is on a '
            'large TV screen heaping praise on the Chinese company.',
            'However, AI itself could change all that.',
            'A more advanced chip industry will help China realize its dream of becoming '
            'a true technology superpower.',
            'China wont be playing catch-up with these new chips, as it has done with '
            'more conventional chips for decades.',
            'Chinas chip ambitions have geopolitical implications, too.',
            'A successful chip industry would make China more economically competitive '
            'and independent.'
        ])
        self.assertEqual(keywords, [
            'chip', 'china', 'ai', 'company', 'iflytek', 'technology',
            'algorithm', 'microchip', 'time', 'silicon', 'advanced',
            'industry', 'beijing', 'belongs', 'inside'
        ])
        self.assertEqual(key_noun_phrases, [
            'chinese company', 'tsinghua unigroup', 'donald trump',
            'gleaming office building', 'artificial-intelligence companies'
        ])
コード例 #2
0
        def test(test_text):
            summarizer = Summarizer(test_text)
            summarizer.parse()

            text = summarizer.summarize()
            keywords = summarizer.keywords()