Exemple #1
0
def get_article(link, news, date):
    article = Article(link)
    article.download()
    article.parse()
    article.nlp()
    lang = 'ENGLISH'
    if len(article.title) < 5 or len(article.text) < 5:
        print('found BM/ID article')
        article = Article(link, language='id')
        article.download()
        article.parse()
        lang = xgb_language.predict(article.text)
        malaya_summarized = malaya.summarize_lsa(article.text.split('\n'),important_words = 20)
        article.summary = malaya_summarized['summary']
        article.keywords = malaya_summarized['cluster-top-words']
    return {'title': article.title, 'url': link, 'authors': article.authors, 'top-image': article.top_image,'text': article.text,
            'keyword':article.keywords, 'summary':article.summary, 'news':news, 'date':date,'language':lang}
Exemple #2
0
def test_lsa_not_cluster():
    assert len(
        malaya.summarize_lsa(isu_kerajaan, return_cluster=False)['top-words'])
Exemple #3
0
def test_lsa():
    assert len(malaya.summarize_lsa(isu_kerajaan)['top-words'])
Exemple #4
0
def test_lsa_original():
    assert len(
        malaya.summarize_lsa(isu_kerajaan,
                             maintain_original=True)['top-words'])
Exemple #5
0
def get_malaya_summary(text):
    import malaya

    return malaya.summarize_lsa(text, important_words=20)