def get_article(link, news, date): article = Article(link) article.download() article.parse() article.nlp() lang = 'ENGLISH' if len(article.title) < 5 or len(article.text) < 5: print('found BM/ID article') article = Article(link, language='id') article.download() article.parse() lang = xgb_language.predict(article.text) malaya_summarized = malaya.summarize_lsa(article.text.split('\n'),important_words = 20) article.summary = malaya_summarized['summary'] article.keywords = malaya_summarized['cluster-top-words'] return {'title': article.title, 'url': link, 'authors': article.authors, 'top-image': article.top_image,'text': article.text, 'keyword':article.keywords, 'summary':article.summary, 'news':news, 'date':date,'language':lang}
def test_lsa_not_cluster(): assert len( malaya.summarize_lsa(isu_kerajaan, return_cluster=False)['top-words'])
def test_lsa(): assert len(malaya.summarize_lsa(isu_kerajaan)['top-words'])
def test_lsa_original(): assert len( malaya.summarize_lsa(isu_kerajaan, maintain_original=True)['top-words'])
def get_malaya_summary(text): import malaya return malaya.summarize_lsa(text, important_words=20)