Exemple #1
0
    def get_text(self, url: str) -> None:
        try:
            article = Article(url, keep_article_html=self.keep_html)
            article.download(input_html=self.get_page_content(url=url))

            article.parse()
            article.nlp()

            article.set_meta_data({
                'title':
                article.title,
                'keywords':
                article.keywords,
                'authors':
                article.authors,
                'images':
                list(article.images),  # Convert set to list
                'description':
                article.meta_description,
                'date': (article.publish_date
                         and article.publish_date.isoformat()),
                'url':
                url
            })

            self.save_article(article=article)
        except (ConnectionError, InvalidSchema, MissingSchema, HTTPError,
                TooManyRedirects, ArticleException) as e:
            logger.error("URL %s with error %s", url, str(e))