def callback(self, response): """will fetch the feed and interprete results (304, etag) or will challenge pyagg to compare gotten entries with existing ones""" try: response = response.result() response.raise_for_status() except Exception as error: error_count = self.feed['error_count'] + 1 logger.exception('%r %r - an error occured while fetching ' 'feed; bumping error count to %r', self.feed['id'], self.feed['title'], error_count) future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], {'error_count': error_count, 'last_error': str(error), 'user_id': self.feed['user_id']}) return if response.status_code == 304: logger.info("%r %r - feed responded with 304", self.feed['id'], self.feed['title']) self.clean_feed() return if 'etag' not in response.headers: logger.debug('%r %r - manually generating etag', self.feed['id'], self.feed['title']) response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text) if response.headers['etag'] and self.feed['etag'] \ and response.headers['etag'] == self.feed['etag']: if 'pyagg' in self.feed['etag']: logger.info("%r %r - calculated hash matches (%d)", self.feed['id'], self.feed['title'], response.status_code) else: logger.info("%r %r - feed responded with same etag (%d)", self.feed['id'], self.feed['title'], response.status_code) self.clean_feed() return else: logger.debug('%r %r - etag mismatch %r != %r', self.feed['id'], self.feed['title'], response.headers['etag'], self.feed['etag']) logger.info('%r %r - cache validation failed, challenging entries', self.feed['id'], self.feed['title']) ids, entries = [], {} parsed_response = feedparser.parse(response.content) for entry in parsed_response['entries']: entry_ids = extract_id(entry) entry_ids['feed_id'] = self.feed['id'] entry_ids['user_id'] = self.feed['user_id'] entries[tuple(sorted(entry_ids.items()))] = entry ids.append(entry_ids) logger.debug('%r %r - found %d entries %r', self.feed['id'], self.feed['title'], len(ids), ids) future = self.query_pyagg('get', 'articles/challenge', {'ids': ids}) updater = PyAggUpdater(self.feed, entries, response.headers, parsed_response, self.auth, self.pool, self.session) future.add_done_callback(updater.callback)
async def insert_database(user, feed): articles = await parse_feed(user, feed) if None is articles: return [] logger.debug('inserting articles for {}'.format(feed.title)) logger.info("Database insertion...") new_articles = [] art_contr = ArticleController(user.id) for article in articles: existing_article_req = art_contr.read(feed_id=feed.id, **extract_id(article)) exist = existing_article_req.count() != 0 if exist: # if the article has been already retrieved, we only update # the content or the title logger.debug("Article %r (%r) already in the database.", article['title'], article['link']) existing_article = existing_article_req.first() new_updated_date = None try: new_updated_date = dateutil.parser.parse(article['updated']) except Exception as e: print(e) #new_updated_date = existing_article.date if None is existing_article.updated_date: existing_article.updated_date = new_updated_date.replace( tzinfo=None) if existing_article.updated_date.strftime('%Y-%m-%dT%H:%M:%S') != \ new_updated_date.strftime('%Y-%m-%dT%H:%M:%S'): existing_article.updated_date = \ new_updated_date.replace(tzinfo=None) if existing_article.title != article['title']: existing_article.title = article['title'] content = get_article_content(article) if existing_article.content != content: existing_article.content = content existing_article.readed = False art_contr.update({'entry_id': existing_article.entry_id}, existing_article.dump()) continue article = construct_article(article, feed) try: new_articles.append(art_contr.create(**article)) logger.info("New article % (%r) added.", article['title'], article['link']) except Exception: logger.exception("Error when inserting article in database:") continue return new_articles
async def insert_database(user, feed): articles = await parse_feed(user, feed) if None is articles: return [] logger.debug('inserting articles for {}'.format(feed.title)) logger.info("Database insertion...") new_articles = [] art_contr = ArticleController(user.id) for article in articles: existing_article_req = art_contr.read(feed_id=feed.id, **extract_id(article)) exist = existing_article_req.count() != 0 if exist: # if the article has been already retrieved, we only update # the content or the title logger.debug("Article %r (%r) already in the database.", article['title'], article['link']) existing_article = existing_article_req.first() new_updated_date = None try: new_updated_date = dateutil.parser.parse(article['updated']) except Exception as e: print(e)#new_updated_date = existing_article.date if None is existing_article.updated_date: existing_article.updated_date = new_updated_date.replace(tzinfo=None) if existing_article.updated_date.strftime('%Y-%m-%dT%H:%M:%S') != \ new_updated_date.strftime('%Y-%m-%dT%H:%M:%S'): existing_article.updated_date = \ new_updated_date.replace(tzinfo=None) if existing_article.title != article['title']: existing_article.title = article['title'] content = get_article_content(article) if existing_article.content != content: existing_article.content = content existing_article.readed = False art_contr.update({'entry_id': existing_article.entry_id}, existing_article.dump()) continue article = construct_article(article, feed) try: new_articles.append(art_contr.create(**article)) logger.info("New article % (%r) added.", article['title'], article['link']) except Exception: logger.exception("Error when inserting article in database:") continue return new_articles