Exemple #1
0
    def callback(self, response):
        """Will process the result from the challenge, creating missing article
        and updating the feed"""
        article_created = False
        if response.result().status_code != 204:
            results = response.result().json()
            logger.debug('%r %r - %d entries were not matched '
                         'and will be created',
                         self.feed['id'], self.feed['title'], len(results))
            for id_to_create in results:
                article_created = True
                entry = construct_article(
                        self.entries[tuple(sorted(id_to_create.items()))],
                        self.feed)
                logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
                            self.feed['title'], entry['title'],
                            entry['user_id'], id_to_create)
                self.query_jarr('post', 'article', entry)

        logger.debug('%r %r - updating feed etag %r last_mod %r',
                     self.feed['id'], self.feed['title'],
                     self.headers.get('etag', ''),
                     self.headers.get('last-modified', ''))

        up_feed = {'error_count': 0, 'last_error': None,
                   'etag': self.headers.get('etag', ''),
                   'last_modified': self.headers.get('last-modified',
                                    strftime('%a, %d %b %Y %X %Z', gmtime()))}
        fresh_feed = construct_feed_from(url=self.feed['link'],
                                         fp_parsed=self.parsed_feed)
        if fresh_feed.get('description'):
            fresh_feed['description'] \
                    = html.unescape(fresh_feed['description'])

        for key in ('description', 'site_link', 'icon_url'):
            if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
                up_feed[key] = fresh_feed[key]
        if not self.feed.get('title'):
            up_feed['title'] = html.unescape(fresh_feed.get('title', ''))
        up_feed['user_id'] = self.feed['user_id']
        # re-getting that feed earlier since new entries appeared
        if article_created:
            up_feed['last_retrieved'] = datetime.utcnow()

        diff_keys = {key for key in up_feed
                     if up_feed[key] != self.feed.get(key)}
        if not diff_keys:
            return  # no change in the feed, no update
        if not article_created and diff_keys == {'last_modified', 'etag'}:
            return  # meaningless if no new article has been published
        logger.info('%r %r - pushing feed attrs %r',
                self.feed['id'], self.feed['title'],
                {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
                 for key in up_feed if up_feed[key] != self.feed.get(key)})

        self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed)
Exemple #2
0
async def insert_database(user, feed):

    articles = await parse_feed(user, feed)
    if None is articles:
        return []

    logger.debug('inserting articles for {}'.format(feed.title))

    logger.info("Database insertion...")
    new_articles = []
    art_contr = ArticleController(user.id)
    for article in articles:
        existing_article_req = art_contr.read(feed_id=feed.id,
                        **extract_id(article))
        exist = existing_article_req.count() != 0
        if exist:
            existing_article = existing_article_req.first()
            is_updated = False
            logger.debug("Article %r (%r) already in the database.",
                         article['title'], article['link'])
            content = get_article_content(article)
            if existing_article.title != article['title']:
                existing_article.title = article['title']
                is_updated = True
            if existing_article.content != content:
                existing_article.content = content
                existing_article.readed = False
                is_updated = True
            if is_updated:
                art_contr.update({'entry_id': existing_article.entry_id},
                                 existing_article.dump())
            continue
        article = construct_article(article, feed.dump())
        try:
            new_articles.append(art_contr.create(**article))
            logger.info("New article % (%r) added.",
                        article['title'], article['link'])
        except Exception:
            logger.exception("Error when inserting article in database:")
            continue
    return new_articles
Exemple #3
0
async def insert_database(user, feed):

    articles = await parse_feed(user, feed)
    if None is articles:
        return []

    logger.debug('inserting articles for {}'.format(feed.title))

    logger.info("Database insertion...")
    new_articles = []
    art_contr = ArticleController(user.id)
    for article in articles:
        existing_article_req = art_contr.read(feed_id=feed.id,
                                              **extract_id(article))
        exist = existing_article_req.count() != 0
        if exist:
            existing_article = existing_article_req.first()
            is_updated = False
            logger.debug("Article %r (%r) already in the database.",
                         article['title'], article['link'])
            content = get_article_content(article)
            if existing_article.title != article['title']:
                existing_article.title = article['title']
                is_updated = True
            if existing_article.content != content:
                existing_article.content = content
                existing_article.readed = False
                is_updated = True
            if is_updated:
                art_contr.update({'entry_id': existing_article.entry_id},
                                 existing_article.dump())
            continue
        article = construct_article(article, feed.dump())
        try:
            new_articles.append(art_contr.create(**article))
            logger.info("New article % (%r) added.", article['title'],
                        article['link'])
        except Exception:
            logger.exception("Error when inserting article in database:")
            continue
    return new_articles
Exemple #4
0
    def callback(self, response):
        """Will process the result from the challenge, creating missing article
        and updating the feed"""
        article_created = False
        try:
            response = response.result()
            response.raise_for_status()
        except Exception:
            logger.exception('error while contacting JARR:')
            # ignore error on when contacting JARR
            # leave it to the next iteration
            return
        if response.status_code != 204:
            results = response.json()
            logger.debug(
                '%r %r - %d entries were not matched '
                'and will be created', self.feed['id'], self.feed['title'],
                len(results))
            for id_to_create in results:
                article_created = True
                entry = construct_article(
                    self.entries[tuple(sorted(id_to_create.items()))],
                    self.feed)
                logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
                            self.feed['title'], entry['title'],
                            entry['user_id'], id_to_create)
                self.query_jarr('post', 'article', entry)

        logger.debug('%r %r - updating feed etag %r last_mod %r',
                     self.feed['id'], self.feed['title'],
                     self.headers.get('etag', ''),
                     self.headers.get('last-modified', ''))

        up_feed = {
            'error_count':
            0,
            'last_error':
            None,
            'etag':
            self.headers.get('etag', ''),
            'last_modified':
            self.headers.get('last-modified',
                             strftime('%a, %d %b %Y %X %Z', gmtime()))
        }
        fresh_feed = construct_feed_from(url=self.feed['link'],
                                         fp_parsed=self.parsed_feed)
        if fresh_feed.get('description'):
            fresh_feed['description'] \
                    = html.unescape(fresh_feed['description'])

        for key in ('description', 'site_link', 'icon_url'):
            if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
                up_feed[key] = fresh_feed[key]
        if not self.feed.get('title'):
            up_feed['title'] = html.unescape(fresh_feed.get('title', ''))
        up_feed['user_id'] = self.feed['user_id']
        # re-getting that feed earlier since new entries appeared
        if article_created:
            up_feed['last_retrieved'] = datetime.utcnow()

        diff_keys = {
            key
            for key in up_feed if up_feed[key] != self.feed.get(key)
        }
        if not diff_keys:
            return  # no change in the feed, no update
        if not article_created and diff_keys == {'last_modified', 'etag'}:
            return  # meaningless if no new article has been published
        logger.info(
            '%r %r - pushing feed attrs %r', self.feed['id'],
            self.feed['title'], {
                key: "%s -> %s" % (up_feed[key], self.feed.get(key))
                for key in up_feed if up_feed[key] != self.feed.get(key)
            })

        self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed)