def test_dirty_text_summary(self): test_text = '最新 科技 http: 新闻和创业 公司信息 ”∆˙∫˚˜ ˜µ∆∫˙© ∆∆˚µ˚' + self.test_text[ 0] summarizer = Summarizer(test_text) summarizer.parse() text = summarizer.summarize() keywords = summarizer.keywords() key_noun_phrases = summarizer.key_noun_phrases() self.assertEqual(text, [ 'This is happening in the city of Tianjin, about an hours drive south of ' 'Beijing, within a gleaming office building that belongs to iFlytek, one of ' 'Chinas rapidly rising artificial-intelligence companies.', 'Beyond guarded gates, inside a glitzy showroom, the US president is on a ' 'large TV screen heaping praise on the Chinese company.', 'This is happening in the city of Tianjin, about an hours drive south of ' 'Beijing, within a gleaming office building that belongs to iFlytek, one of ' 'Chinas rapidly rising artificial-intelligence companies.', 'Beyond guarded gates, inside a glitzy showroom, the US president is on a ' 'large TV screen heaping praise on the Chinese company.', 'However, AI itself could change all that.', 'A more advanced chip industry will help China realize its dream of becoming ' 'a true technology superpower.', 'China wont be playing catch-up with these new chips, as it has done with ' 'more conventional chips for decades.', 'Chinas chip ambitions have geopolitical implications, too.', 'A successful chip industry would make China more economically competitive ' 'and independent.' ]) self.assertEqual(keywords, [ 'chip', 'china', 'ai', 'company', 'iflytek', 'technology', 'algorithm', 'microchip', 'time', 'silicon', 'advanced', 'industry', 'beijing', 'belongs', 'inside' ]) self.assertEqual(key_noun_phrases, [ 'chinese company', 'tsinghua unigroup', 'donald trump', 'gleaming office building', 'artificial-intelligence companies' ])
def test(test_text): summarizer = Summarizer(test_text) summarizer.parse() text = summarizer.summarize() keywords = summarizer.keywords()