Esempio n. 1
0
File: feed.py Progetto: bzero/JARR
def bookmarklet():
    feed_contr = FeedController(g.user.id)
    url = (request.args if request.method == 'GET' else request.form)\
            .get('url', None)
    if not url:
        flash(gettext("Couldn't add feed: url missing."), "error")
        raise BadRequest("url is missing")

    feed_exists = list(feed_contr.read(__or__={'link': url, 'site_link': url}))
    if feed_exists:
        flash(gettext("Couldn't add feed: feed already exists."),
                "warning")
        return redirect(url_for('feed.form', feed_id=feed_exists[0].id))

    try:
        feed = construct_feed_from(url)
    except requests.exceptions.ConnectionError:
        flash(gettext("Impossible to connect to the address: {}.".format(url)),
              "danger")
        return redirect(url_for('home'))
    except Exception:
        logger.exception('something bad happened when fetching %r', url)
        return redirect(url_for('home'))
    if not feed.get('link'):
        feed['enabled'] = False
        flash(gettext("Couldn't find a feed url, you'll need to find a Atom or"
                      " RSS link manually and reactivate this feed"),
              'warning')
    feed = feed_contr.create(**feed)
    flash(gettext('Feed was successfully created.'), 'success')
    if feed.enabled and conf.CRAWLING_METHOD == "classic":
        utils.fetch(g.user.id, feed.id)
        flash(gettext("Downloading articles for the new feed..."), 'info')
    return redirect(url_for('feed.form', feed_id=feed.id))
Esempio n. 2
0
def bookmarklet():
    feed_contr = FeedController(current_user.id)
    url = (request.args if request.method == 'GET' else request.form)\
            .get('url', None)
    if not url:
        flash(gettext("Couldn't add feed: url missing."), "error")
        raise BadRequest("url is missing")

    feed_exists = list(feed_contr.read(__or__=[{'link': url},
                                               {'site_link': url}]))
    if feed_exists:
        flash(gettext("Couldn't add feed: feed already exists."),
                "warning")
        return redirect(url_for('home', at='f', ai=feed_exists[0].id))

    try:
        feed = construct_feed_from(url)
    except requests.exceptions.ConnectionError:
        flash(gettext("Impossible to connect to the address: {}.".format(url)),
              "danger")
        return redirect(url_for('home'))
    except Exception:
        logger.exception('something bad happened when fetching %r', url)
        return redirect(url_for('home'))
    if not feed.get('link'):
        feed['enabled'] = False
        flash(gettext("Couldn't find a feed url, you'll need to find a Atom or"
                      " RSS link manually and reactivate this feed"),
              'warning')
    feed = feed_contr.create(**feed)
    flash(gettext('Feed was successfully created.'), 'success')
    if feed.enabled and conf.CRAWLER_TYPE == "classic":
        utils.fetch(current_user.id, feed.id)
        flash(gettext("Downloading articles for the new feed..."), 'info')
    return redirect(url_for('home', at='f', ai=feed.id))
Esempio n. 3
0
    def callback(self, response):
        """Will process the result from the challenge, creating missing article
        and updating the feed"""
        article_created = False
        if response.result().status_code != 204:
            results = response.result().json()
            logger.debug('%r %r - %d entries were not matched '
                         'and will be created',
                         self.feed['id'], self.feed['title'], len(results))
            for id_to_create in results:
                article_created = True
                entry = construct_article(
                        self.entries[tuple(sorted(id_to_create.items()))],
                        self.feed)
                logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
                            self.feed['title'], entry['title'],
                            entry['user_id'], id_to_create)
                self.query_jarr('post', 'article', entry)

        logger.debug('%r %r - updating feed etag %r last_mod %r',
                     self.feed['id'], self.feed['title'],
                     self.headers.get('etag', ''),
                     self.headers.get('last-modified', ''))

        up_feed = {'error_count': 0, 'last_error': None,
                   'etag': self.headers.get('etag', ''),
                   'last_modified': self.headers.get('last-modified',
                                    strftime('%a, %d %b %Y %X %Z', gmtime()))}
        fresh_feed = construct_feed_from(url=self.feed['link'],
                                         fp_parsed=self.parsed_feed)
        if fresh_feed.get('description'):
            fresh_feed['description'] \
                    = html.unescape(fresh_feed['description'])

        for key in ('description', 'site_link', 'icon_url'):
            if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
                up_feed[key] = fresh_feed[key]
        if not self.feed.get('title'):
            up_feed['title'] = html.unescape(fresh_feed.get('title', ''))
        up_feed['user_id'] = self.feed['user_id']
        # re-getting that feed earlier since new entries appeared
        if article_created:
            up_feed['last_retrieved'] = datetime.utcnow()

        diff_keys = {key for key in up_feed
                     if up_feed[key] != self.feed.get(key)}
        if not diff_keys:
            return  # no change in the feed, no update
        if not article_created and diff_keys == {'last_modified', 'etag'}:
            return  # meaningless if no new article has been published
        logger.info('%r %r - pushing feed attrs %r',
                self.feed['id'], self.feed['title'],
                {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
                 for key in up_feed if up_feed[key] != self.feed.get(key)})

        self.query_jarr('put', 'feed/%d' % self.feed['id'], up_feed)
Esempio n. 4
0
    def callback(self, response):
        """Will process the result from the challenge, creating missing article
        and updating the feed"""
        article_created = False
        if response.result().status_code != 204:
            results = response.result().json()
            logger.debug('%r %r - %d entries were not matched '
                         'and will be created',
                         self.feed['id'], self.feed['title'], len(results))
            for id_to_create in results:
                article_created = True
                entry = construct_article(
                        self.entries[tuple(sorted(id_to_create.items()))],
                        self.feed)
                logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
                            self.feed['title'], entry['title'],
                            entry['user_id'], id_to_create)
                self.query_pyagg('post', 'article', entry)

        logger.debug('%r %r - updating feed etag %r last_mod %r',
                     self.feed['id'], self.feed['title'],
                     self.headers.get('etag', ''),
                     self.headers.get('last-modified', ''))

        up_feed = {'error_count': 0, 'last_error': None,
                   'etag': self.headers.get('etag', ''),
                   'last_modified': self.headers.get('last-modified',
                                    strftime('%a, %d %b %Y %X %Z', gmtime()))}
        fresh_feed = construct_feed_from(url=self.feed['link'],
                                         fp_parsed=self.parsed_feed)
        for key in ('description', 'site_link', 'icon_url'):
            if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
                up_feed[key] = fresh_feed[key]
        if not self.feed.get('title'):
            up_feed['title'] = fresh_feed.get('title', '')
        up_feed['user_id'] = self.feed['user_id']
        # re-getting that feed earlier since new entries appeared
        if article_created:
            up_feed['last_retrieved'] \
                    = (datetime.now() - timedelta(minutes=45)).isoformat()

        diff_keys = {key for key in up_feed
                     if up_feed[key] != self.feed.get(key)}
        if not diff_keys:
            return  # no change in the feed, no update
        if not article_created and diff_keys == {'last_modified', 'etag'}:
            return  # meaningless if no new article has been published
        logger.info('%r %r - pushing feed attrs %r',
                self.feed['id'], self.feed['title'],
                {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
                 for key in up_feed if up_feed[key] != self.feed.get(key)})

        self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed)
Esempio n. 5
0
async def parse_feed(user, feed):
    """
    Fetch a feed.
    Update the feed and return the articles.
    """
    parsed_feed = None
    up_feed = {}
    articles = []
    with (await sem):
        try:
            parsed_feed = await get(feed.link)
        except Exception as e:
            up_feed['last_error'] = str(e)
            up_feed['error_count'] = feed.error_count + 1
        finally:
            up_feed['last_retrieved'] = datetime.utcnow()
            if parsed_feed is None:
                FeedController().update({'id': feed.id}, up_feed)
                return

    if not is_parsing_ok(parsed_feed):
        up_feed['last_error'] = str(parsed_feed['bozo_exception'])
        up_feed['error_count'] = feed.error_count + 1
        FeedController().update({'id': feed.id}, up_feed)
        return
    if parsed_feed['entries'] != []:
        articles = parsed_feed['entries']

    up_feed['error_count'] = 0
    up_feed['last_error'] = ""

    # Feed informations
    construct_feed_from(feed.link, parsed_feed).update(up_feed)
    if feed.title and 'title' in up_feed:
        # do not override the title set by the user
        del up_feed['title']
    FeedController().update({'id': feed.id}, up_feed)

    return articles
Esempio n. 6
0
async def parse_feed(user, feed):
    """
    Fetch a feed.
    Update the feed and return the articles.
    """
    parsed_feed = None
    up_feed = {}
    articles = []
    with (await sem):
        try:
            parsed_feed = await get(feed.link)
        except Exception as e:
            up_feed['last_error'] = str(e)
            up_feed['error_count'] = feed.error_count + 1
        finally:
            up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal())
            if parsed_feed is None:
                FeedController().update({'id': feed.id}, up_feed)
                return

    if not is_parsing_ok(parsed_feed):
        up_feed['last_error'] = str(parsed_feed['bozo_exception'])
        up_feed['error_count'] = feed.error_count + 1
        FeedController().update({'id': feed.id}, up_feed)
        return
    if parsed_feed['entries'] != []:
        articles = parsed_feed['entries']

    up_feed['error_count'] = 0
    up_feed['last_error'] = ""

    # Feed informations
    construct_feed_from(feed.link, parsed_feed).update(up_feed)
    if feed.title and 'title' in up_feed:
        # do not override the title set by the user
        del up_feed['title']
    FeedController().update({'id': feed.id}, up_feed)

    return articles