def fetch_feed_articles(feed_id): from rdr.modules.feeds.articles.sync import ArticlesSynchronizer from rdr.modules.feeds.models import Feed from rdr.application.cache import cache sync_count = 0 cache_key = 'tasks.fetch_feed_articles.lock.' + str(feed_id) if not cache.get(cache_key): cache.set(cache_key, True, timeout=ARTICLE_FETCH_TIMEOUT) feed = Feed.query.filter(Feed.id == feed_id).first() if feed: synchronizer = ArticlesSynchronizer(feed) articles = synchronizer.sync() sync_count = len(articles) cache.delete(cache_key) return feed_id, sync_count
def articles_sync(feed_id): page = 1 feed = Feed.query.filter(Feed.id == feed_id).first() if not feed: raise json.InvalidRequest("Unknown feed id") default_update_pause_seconds = 300 if not feed.last_update or (datetime.now() - feed.last_update) > timedelta(seconds=default_update_pause_seconds): synchronizer = ArticlesSynchronizer(feed) synchronizer.sync() articles_pagination = Article.query \ .filter((Article.feed_id == feed_id) & (Article.active == True)) \ .order_by(Article.published.desc()) \ .options(db.joinedload(Article.feed)) \ .paginate(page, per_page=40, error_out=False) records = UserArticleRecord.wrap_articles_list(articles_pagination.items) feed_record = UserPackageRecord.wrap_package(feed) return { 'success': True, 'articles': [x.to_dict() for x in records], 'feed': feed_record.to_dict() }
def _resolve(self, query): query = http.encode_url(query) extractors = [] ex = FeedExtractor(query) source = SourceExtractor(query) feeds = [] try: source.parse() except Exception as e: app.logger.exception(e) source = None try: ex.parse() extractors.append(ex) if not source: app.logger.info('Try to extract source data via rss site url') source = SourceExtractor(ex.url) try: source.parse() except Exception as e: app.logger.exception(e) source = None except InvalidFeedException as e: app.logger.exception(e) if not source or not source.feed_urls: return [] app.logger.info('Try to get alternate RSS feeds from site url') for url in source.feed_urls: match_feed = Feed.query.filter((Feed.url == url) | (Feed.channel_url == url)).first() if match_feed: feeds.append(match_feed) else: # todo: multi threading parsing app.logger.info('Try to extract feeds from alternate source: "%s"' % url) ex = FeedExtractor(url) ex.parse() extractors.append(ex) if extractors: for extractor in extractors: lang = extractor.language if not lang and source: lang = source.language if not lang: lang = try_to_resolve_lang_by_domain(query) if lang: lang = lang[:2] feed = Feed(url=extractor.url, channel_url=extractor.channel_url, title=extractor.title or 'Unnamed feed', language=lang, active=False, created=datetime.now()) db.session.add(feed) db.session.commit() db.session.add(FeedAliasKeyword(keyword=feed.title, feed_id=feed.id)) if query != feed.url and query != feed.channel_url: db.session.add(FeedAliasUrl(url=query, feed_id=feed.id)) db.session.commit() feeds.append(feed) try: image = None if source: image = source.retrieve_image() if not image: image = extractor.retrieve_image() if image: image.owner_id = None db.session.add(image) db.session.commit() feed.icon_image_id = image.id db.session.commit() except Exception as e: app.logger.exception(e) if self.load_articles: app.logger.info('Load articles') # todo: multi threading loading for feed in feeds: try: from rdr.modules.feeds.articles.sync import ArticlesSynchronizer synchronizer = ArticlesSynchronizer(feed) synchronizer.sync() except Exception as e: app.logger.exception(e) return feeds
def activate_feed(feed): synchronizer = ArticlesSynchronizer(feed) return synchronizer.sync()