Exemple #1
0
def get_article(link, news, date):
    article = Article(link)
    article.download()
    article.parse()
    article.nlp()
    lang = 'ENGLISH'
    if len(article.title) < 5 or len(article.text) < 5:
        lang = 'INDONESIA'
        print('found BM/ID article')
        article = Article(link, language='id')
        article.download()
        article.parse()
        if xgb_language:
            lang = xgb_language.predict(article.text)
            malaya_summarized = get_malaya_summary(article.text.split('\n'))
            article.summary = malaya_summarized['summary']
            article.keywords = malaya_summarized['cluster-top-words']
        else:
            article.nlp()
    return {
        'title': article.title,
        'url': link,
        'authors': article.authors,
        'top-image': article.top_image,
        'text': article.text,
        'keyword': article.keywords,
        'summary': article.summary,
        'news': news,
        'date': date,
        'language': lang,
    }