Example #1
0
 def request(self, url):
     assert url, 'Article url is empty'
     token = app.config.get('DIFFBOT_API_TOKEN', None)
     assert token, 'DIFFBOT_API_TOKEN is required parameter for using Diffbot provider'
     api_url = app.config.get('DIFFBOT_API_URL', 'http://api.diffbot.com/v3/analyze')
     response = requests.get(api_url, params={
         'token': token,
         'url': http.encode_url(url),
         'fields': 'title,text,images'
     })
     return response.json()
Example #2
0
    def _resolve(self, query):
        query = http.encode_url(query)
        extractors = []
        ex = FeedExtractor(query)
        source = SourceExtractor(query)
        feeds = []
        try:
            source.parse()
        except Exception as e:
            app.logger.exception(e)
            source = None
        try:
            ex.parse()
            extractors.append(ex)
            if not source:
                app.logger.info('Try to extract source data via rss site url')
                source = SourceExtractor(ex.url)
                try:
                    source.parse()
                except Exception as e:
                    app.logger.exception(e)
                    source = None
        except InvalidFeedException as e:
            app.logger.exception(e)
            if not source or not source.feed_urls:
                return []
            app.logger.info('Try to get alternate RSS feeds from site url')
            for url in source.feed_urls:
                match_feed = Feed.query.filter((Feed.url == url) | (Feed.channel_url == url)).first()
                if match_feed:
                    feeds.append(match_feed)
                else:
                    # todo: multi threading parsing
                    app.logger.info('Try to extract feeds from alternate source: "%s"' % url)
                    ex = FeedExtractor(url)
                    ex.parse()
                    extractors.append(ex)
        if extractors:
            for extractor in extractors:
                lang = extractor.language
                if not lang and source:
                    lang = source.language
                if not lang:
                    lang = try_to_resolve_lang_by_domain(query)
                if lang:
                    lang = lang[:2]
                feed = Feed(url=extractor.url,
                            channel_url=extractor.channel_url,
                            title=extractor.title or 'Unnamed feed',
                            language=lang,
                            active=False,
                            created=datetime.now())
                db.session.add(feed)
                db.session.commit()
                db.session.add(FeedAliasKeyword(keyword=feed.title, feed_id=feed.id))
                if query != feed.url and query != feed.channel_url:
                    db.session.add(FeedAliasUrl(url=query, feed_id=feed.id))
                db.session.commit()
                feeds.append(feed)
                try:
                    image = None
                    if source:
                        image = source.retrieve_image()
                    if not image:
                        image = extractor.retrieve_image()
                    if image:
                        image.owner_id = None

                        db.session.add(image)
                        db.session.commit()

                        feed.icon_image_id = image.id
                        db.session.commit()
                except Exception as e:
                    app.logger.exception(e)
        if self.load_articles:
            app.logger.info('Load articles')
            # todo: multi threading loading
            for feed in feeds:
                try:
                    from rdr.modules.feeds.articles.sync import ArticlesSynchronizer
                    synchronizer = ArticlesSynchronizer(feed)
                    synchronizer.sync()
                except Exception as e:
                    app.logger.exception(e)
        return feeds