def callback(self, response): """Will process the result from the challenge, creating missing article and updating the feed""" article_created = False if response.result().status_code != 204: results = response.result().json() logger.debug('%r %r - %d entries were not matched ' 'and will be created', self.feed['id'], self.feed['title'], len(results)) for id_to_create in results: article_created = True entry = construct_article( self.entries[tuple(sorted(id_to_create.items()))], self.feed) logger.info('%r %r - creating %r for %r - %r', self.feed['id'], self.feed['title'], entry['title'], entry['user_id'], id_to_create) self.query_jarr('post', 'article', entry) logger.debug('%r %r - updating feed etag %r last_mod %r', self.feed['id'], self.feed['title'], self.headers.get('etag', ''), self.headers.get('last-modified', '')) up_feed = {'error_count': 0, 'last_error': None, 'etag': self.headers.get('etag', ''), 'last_modified': self.headers.get('last-modified', strftime('%a, %d %b %Y %X %Z', gmtime()))} fresh_feed = construct_feed_from(url=self.feed['link'], fp_parsed=self.parsed_feed) if fresh_feed.get('description'): fresh_feed['description'] \ = html.unescape(fresh_feed['description']) for key in ('description', 'site_link', 'icon_url'): if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key): up_feed[key] = fresh_feed[key] if not self.feed.get('title'): up_feed['title'] = html.unescape(fresh_feed.get('title', '')) up_feed['user_id'] = self.feed['user_id'] # re-getting that feed earlier since new entries appeared if article_created: up_feed['last_retrieved'] = datetime.utcnow() diff_keys = {key for key in up_feed if up_feed[key] != self.feed.get(key)} if not diff_keys: return # no change in the feed, no update if not article_created and diff_keys == {'last_modified', 'etag'}: return # meaningless if no new article has been published logger.info('%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) for key in up_feed if up_feed[key] != self.feed.get(key)}) self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed)
async def insert_database(user, feed): articles = await parse_feed(user, feed) if None is articles: return [] logger.debug('inserting articles for {}'.format(feed.title)) logger.info("Database insertion...") new_articles = [] art_contr = ArticleController(user.id) for article in articles: existing_article_req = art_contr.read(feed_id=feed.id, **extract_id(article)) exist = existing_article_req.count() != 0 if exist: existing_article = existing_article_req.first() is_updated = False logger.debug("Article %r (%r) already in the database.", article['title'], article['link']) content = get_article_content(article) if existing_article.title != article['title']: existing_article.title = article['title'] is_updated = True if existing_article.content != content: existing_article.content = content existing_article.readed = False is_updated = True if is_updated: art_contr.update({'entry_id': existing_article.entry_id}, existing_article.dump()) continue article = construct_article(article, feed.dump()) try: new_articles.append(art_contr.create(**article)) logger.info("New article % (%r) added.", article['title'], article['link']) except Exception: logger.exception("Error when inserting article in database:") continue return new_articles
def callback(self, response): """Will process the result from the challenge, creating missing article and updating the feed""" article_created = False try: response = response.result() response.raise_for_status() except Exception: logger.exception('error while contacting JARR:') # ignore error on when contacting JARR # leave it to the next iteration return if response.status_code != 204: results = response.json() logger.debug( '%r %r - %d entries were not matched ' 'and will be created', self.feed['id'], self.feed['title'], len(results)) for id_to_create in results: article_created = True entry = construct_article( self.entries[tuple(sorted(id_to_create.items()))], self.feed) logger.info('%r %r - creating %r for %r - %r', self.feed['id'], self.feed['title'], entry['title'], entry['user_id'], id_to_create) self.query_jarr('post', 'article', entry) logger.debug('%r %r - updating feed etag %r last_mod %r', self.feed['id'], self.feed['title'], self.headers.get('etag', ''), self.headers.get('last-modified', '')) up_feed = { 'error_count': 0, 'last_error': None, 'etag': self.headers.get('etag', ''), 'last_modified': self.headers.get('last-modified', strftime('%a, %d %b %Y %X %Z', gmtime())) } fresh_feed = construct_feed_from(url=self.feed['link'], fp_parsed=self.parsed_feed) if fresh_feed.get('description'): fresh_feed['description'] \ = html.unescape(fresh_feed['description']) for key in ('description', 'site_link', 'icon_url'): if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key): up_feed[key] = fresh_feed[key] if not self.feed.get('title'): up_feed['title'] = html.unescape(fresh_feed.get('title', '')) up_feed['user_id'] = self.feed['user_id'] # re-getting that feed earlier since new entries appeared if article_created: up_feed['last_retrieved'] = datetime.utcnow() diff_keys = { key for key in up_feed if up_feed[key] != self.feed.get(key) } if not diff_keys: return # no change in the feed, no update if not article_created and diff_keys == {'last_modified', 'etag'}: return # meaningless if no new article has been published logger.info( '%r %r - pushing feed attrs %r', self.feed['id'], self.feed['title'], { key: "%s -> %s" % (up_feed[key], self.feed.get(key)) for key in up_feed if up_feed[key] != self.feed.get(key) }) self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed)