Exemple #1
0
    def download(self):
        try:
            logger.info('Downloading article for {}'.format(
                self._pocket_item.url))
            article = Article(self._pocket_item.url)
            article.download()
            logger.info('Parsing article for {}'.format(self._pocket_item.url))
            article.parse()
            logger.info('Performing NLP on article for {}'.format(
                self._pocket_item.url))
            article.nlp()

            article.tags = list(article.tags)
            if article.publish_date:
                article.publish_date = article.publish_date.timestamp()

            article.images = list(article.images)

            self._pocket_item.article = dict(
                (k, v) for k, v in article.__dict__.items()
                if k in self.ARTICLE_ATTRIBUTES_TO_KEEP)
        except ArticleException:
            logger.warning('Could not download article for {}'.format(
                self._pocket_item.url))
            return {}