Ejemplo n.º 1
0
    def create_feed(self, url, hub=None, find_feed=True):
        """
        Creates a Feed, searching the website for a feed and PuSH links if find_feed is True.

        :param url: The URL of a website or feed.
        :param hub: The URL of the PuSH hub to use.
        :param find_feed: Set to True to search the website for a feed and override the url
            if feed is found.
        :return: Feed
        """
        topic = url
        parsed_feed = None

        if find_feed:
            app.logger.info(u'Finding feed for url {0}'.format(url))
            data = get_feed(url)
            app.logger.info(u'Found feed {0} with hub {1} for url {2}'
                            .format(data[1], data[0], url))
            if data[0]:
                if not hub:
                    hub = data[0]
            if data[1]:
                topic = data[1]
            if data[2]:
                parsed_feed = data[2]

        if not hub:
            hub = app.config['DEFAULT_HUB']

        app.logger.info(u'Creating Feed with topic {0} and hub {1}'
                        .format(topic, hub))
        feed = Feed(topic=topic, hub=hub)

        if parsed_feed:
            try:
                url = parsed_feed.feed['link']
                domain = get_domain(url)
                if domain:
                    app.logger.info('Set site_url for Feed {0} to {1}'
                                    .format(feed, domain))
                    feed.site_url = domain
            except:
                pass

        db.session.add(feed)

        if parsed_feed:
            app.logger.info(u'Feed creation for topic {0} parsed a feed, '
                            'sending notification'.format(topic))
            notification_received.send(self,
                                       feed=feed,
                                       content_type='application/rss+xml',
                                       content=parsed_feed,
                                       parsed=True)

        db.session.commit()
        return feed
Ejemplo n.º 2
0
    def handle_subscription_response(self, feed, mode, resp):
        """
        Handles the response for a subscription/unsubscription request.

        :param feed: Feed object
        :param mode: Type of request, subscribe, unsubscribe, or retrieve
        :param resp: HTTP Response
        :return: tuple(HTTP status code, string)
        """

        if resp.status_code in [200, 202, 204]:
            try:
                content_type = parse_options_header(resp.headers.get(
                    'content-type'))
            except Exception:
                app.logger.error(u'Could not parse Content-Type header '
                                 'from {0}'.format(feed.hub))

            if content_type is not None:
                if mode == 'subscribe' or mode == 'retrieve':
                    notification_received.send(self,
                                               feed=feed,
                                               content_type=content_type,
                                               content=resp.text)

            message = (u'Status {0}: {1} request for {2} to {3} was successful'
                       .format(resp.status_code, mode, feed.topic, feed.hub))

            app.logger.info(message)

            return (resp.status_code, message)
        else:
            if resp.text is not None:
                message = (u'Status {0}: {1} request for {2} to {3} was '
                           'unsuccessful. Reason: {4}'
                           .format(resp.status_code, mode, feed.topic,
                                   feed.hub, resp.text))
            else:
                message = (u'Status {0}: {1} request for {2} to {3} was '
                           'unsuccessful.'.format(resp.status_code, mode,
                                                  feed.topic, feed.hub))

            app.logger.info(message)
            return (resp.status_code, message)
Ejemplo n.º 3
0
    def get_rss(self, feed):
        """
        Fetches and parses an RSS feed.

        :param feed: Feed object
        """
        parsed_feed = get_parsed_feed(feed.topic)

        if parsed_feed:
            app.logger.info(u'Parsed RSS for Feed {0}, sending notification'
                            .format(feed))
            notification_received.send(self,
                                       feed=feed,
                                       content_type='application/rss+xml',
                                       content=parsed_feed,
                                       parsed=True)
            return True
        else:
            app.logger.info(u'Failed to parse RSS for Feed {0}'.format(feed))
            return False
Ejemplo n.º 4
0
    def get_rss(self, feed):
        """
        Fetches and parses an RSS feed.

        :param feed: Feed object
        """
        parsed_feed = get_parsed_feed(feed.topic)

        if parsed_feed:
            app.logger.info("Parsed RSS for %s, sending notification", feed)
            notification_received.send(
                self,
                feed=feed,
                content_type="application/rss+xml",
                content=parsed_feed,
                parsed=True,
            )
            return True
        else:
            app.logger.info("Failed to parse RSS for %s", feed)
            return False
Ejemplo n.º 5
0
    def handle_subscription_response(
        self, feed: Feed, mode: str, resp: Response
    ) -> Tuple[int, str]:
        """
        Handles the response for a subscription/unsubscription request.

        :param feed: Feed object
        :param mode: Type of request, subscribe, unsubscribe, or retrieve
        :param resp: HTTP Response
        :return: tuple(HTTP status code, string)
        """

        if resp.status_code in [200, 202, 204]:
            content_type = parse_options_header(resp.headers.get("content-type"))

            if mode in [Mode.SUBSCRIBE, Mode.RETRIEVE]:
                notification_received.send(
                    self,
                    feed=feed,
                    content_type=content_type,
                    content=resp.text,
                    encoding=resp.encoding,
                )

            message = f"Status {resp.status_code}: {mode} request for {feed} to {feed.hub} was successful."
        else:
            if resp.text:
                message = (
                    f"Status {resp.status_code}: {mode} request for {feed} to {feed.hub} was "
                    f"unsuccessful. Reason: {resp.text}"
                )
            else:
                message = f"Status {resp.status_code}: {mode} request for {feed} to {feed.hub} was unsuccessful."

        app.logger.info(message)
        return resp.status_code, message
Ejemplo n.º 6
0
def notification(url):
    """Receives a notification from the hub."""

    topic = None
    hub = None

    feed = Feed.query.filter_by(unique_url=url).first()
    if not feed:
        app.logger.warning(u'No feed found for url {0}'.format(url))
        abort(404)

    if feed.status != STATUS.SUBSCRIBED:
        app.logger.warning(u'Received notification for unsubscribed feed '
                           '{0} from {1}'.format(feed.topic,
                                                 request.remote_addr))
        abort(404)

    try:
        lh = request.headers.get('link')

        if lh:
            p = parse_link_header(lh)

            for link in p.links:
                if link.rel == 'hub':
                    hub = link.href
                elif link.rel == 'self':
                    topic = link.href

    except Exception as e:
        app.logger.error(u'Could not parse link header for {0}: {1}'
                         .format(url, e))

    if topic is None:
        topic = feed.topic

    if hub is None:
        hub = feed.hub

    app.logger.info(u'Received notification for {0} from {1}'
                    .format(topic, hub))

    if feed.secret:
        sig = request.headers.get('X-Hub-Signature')
        if not sig:
            app.logger.warning(u'Notification for {0} from {1} did not'
                               'contain secret signature'.format(topic, hub))
            abort(400)

        try:
            h = hmac.new(bytes(feed.secret, 'UTF-8'), digestmod=hashlib.sha1)
            h.update(request.get_data())
            digest = h.hexdigest()
        except Exception:
            app.logger.exception(u'Could not compute hmac signature for {0}'
                                 .format(topic))
            return Response(status=200)

        if sig != "sha1=" + digest:
            app.logger.warning(u'Signature for {0} from {1} did not match'
                               .format(topic, hub))
            return Response(status=200)

    feed.last_update_received = datetime.utcnow()

    if feed.fetch_feed_on_notify:
        app.logger.info(u'Fetching RSS for Feed {0}, discarding notification'
                        .format(feed))
        success = Subscriber().get_rss(feed)
        if not success:
            db.session.commit()
        return Response(status=200)

    data = request.get_data()
    if data is None:
        app.logger.warning(u'No data in notification for {0} from {1}'
                           .format(topic, hub))
        return Response(status=200)

    encoding = request.content_encoding or 'UTF-8'

    notification_received.send(bp,
                               feed=feed,
                               content_type=request.mimetype,
                               content=data,
                               encoding=encoding)

    return Response(status=200)
Ejemplo n.º 7
0
    def create_feed(self, url, hub=None, find_feed=True):
        """
        Creates a Feed, searching the website for a feed and PuSH links
        if find_feed is True.

        :param url: The URL of a website or feed.
        :param hub: The URL of the PuSH hub to use.
        :param find_feed: Set to True to search the website for a feed
            and override the url if feed is found.
        :return: Feed
        """
        topic = url
        parsed_feed = None

        if find_feed:
            app.logger.info("Finding feed for url %s", url)
            data = get_feed(url)
            app.logger.info(
                "Found feed %s with hub %s for url %s", data[1], data[0], url
            )
            if data[0]:
                if not hub:
                    hub = data[0]
            if data[1]:
                topic = data[1]
            if data[2]:
                parsed_feed = data[2]

        if not hub:
            hub = app.config.get("DEFAULT_HUB")

        app.logger.info("Creating Feed with topic %s and hub %s", topic, hub)
        feed = Feed(topic=topic, hub=hub)

        if parsed_feed:
            try:
                url = parsed_feed.feed.get("link")
                domain = get_domain(url)
                if domain:
                    app.logger.info("Set site_url for %s to %s", feed, domain)
                    feed.site_url = domain
            except Exception as e:
                app.logger.error("Error getting domain for %s: %s", feed, e)
                pass

        db.session.add(feed)

        if parsed_feed:
            app.logger.info(
                "Feed creation for topic %s parsed a feed, " "sending notification",
                topic,
            )
            notification_received.send(
                self,
                feed=feed,
                content_type="application/rss+xml",
                content=parsed_feed,
                parsed=True,
            )

        db.session.commit()
        return feed
Ejemplo n.º 8
0
    def fetch_feed(cls, feed: Feed, force: bool = False) -> bool:
        """
        Fetch a single Feed.

        Will attempt to fetch latest version of Feed. Statistics will be saved and next fetch
        scheduled regardless of fetch success. If fetch failed or there is no change in the feed, then
        no notification_received signal will be sent and the process will exit.

        :param feed: Feed to be fetched
        :param force: Always fetch regardless of last-modified values
        :return: True if fetch was successful with new content
        """
        fetched: bool = False

        content: str = ""
        content_length: int = 0
        response_url: str = ""
        headers: dict = {}
        status_code: int = 500
        encoding: str = ""

        app.logger.info("Fetching %s", feed)

        start = time.perf_counter()
        try:
            with requests.get(
                feed.topic,
                headers=feed.fetch_headers(force),
                timeout=(3.05, 10),
                stream=True,
            ) as response:

                # Capture response variables before raising any exceptions
                status_code = response.status_code
                headers = response.headers
                encoding = response.encoding or "utf-8"

                # Only set content if content length is acceptable, else raise ContentLengthException
                content_length = int(response.headers.get("Content-Length", 0))
                if content_length < app.config.get("MAX_FEED_LENGTH"):
                    content = response.text
                else:
                    raise ContentLengthException()

                response.raise_for_status()

        except requests.Timeout as t:
            app.logger.warning("Timeout fetching Feed %s: %s", feed, t)
        except requests.ConnectionError as c:
            app.logger.warning("ConnectionError fetching feed %s: %s", feed, c)
        except requests.RequestException as e:
            app.logger.warning("Error fetching Feed %s: %s", feed, e)
        except ContentLengthException:
            app.logger.warning("TOO BIG: feed=%s size=%s", feed, content_length)
        else:
            fetched = True

        request_time_ms = int((time.perf_counter() - start) * 1000)

        # Read content length from content only if available and required
        if content and content_length == 0:
            content_length = utf8len(content)

        app.logger.info(
            "FETCHED: topic=%s duration=%dms status=%s size=%sb",
            feed.topic,
            request_time_ms,
            status_code,
            content_length,
        )

        feed.last_status_code = status_code
        feed.last_fetch = datetime.utcnow()
        feed.set_next_scheduled_update()

        stats = FeedStats.create_stats(
            feed.id,
            FetchType.PULL,
            status_code=status_code,
            latency=request_time_ms,
            content_length=content_length,
        )

        # Set feed to inactive if Feed is Gone
        if status_code == 410:
            feed.gone = True

        # Exit if Feed not successfully fetched
        if not fetched:
            return cls.no_change(feed, stats)

        # Set Feed ETag from response
        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
        etag = headers.get("etag")
        if etag:
            feed.etag = etag

        # Set Feed Last-Modified from response
        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified
        last_modified = headers.get("last-modified")
        if last_modified:
            feed.last_modified = feed.set_last_modified(last_modified)

        # Save and return if not modified code received
        if status_code == 304:
            return cls.no_change(feed, stats)

        # Check if content hash is new
        new_content = feed.new_content(content, encoding or "UTF-8")

        # Save and return if no new content and not forcing update
        if not new_content and not force:
            return cls.no_change(feed, stats)

        db.session.commit()

        # Set content-location header to final url if not already set
        if "content-location" not in headers:
            headers["content-location"] = response_url

        notification_received.send(
            cls,
            feed=feed,
            content_type=parse_options_header(headers.get("content-type")),
            content=content,
            encoding=encoding,
            stats=stats,
            headers=headers_to_dict(headers),
        )
        return True
Ejemplo n.º 9
0
def notification(url):
    """Receives a notification from the hub."""

    content_length = request.content_length
    if content_length and content_length > app.config.get("MAX_FEED_LENGTH"):
        app.logger.warning("TOO BIG: size=%s url=%s", content_length, url)
        return make_response("", 400)

    # Read data first, should prevent issue with UWSGI sockets.
    # https://uwsgi-docs.readthedocs.io/en/latest/ThingsToKnow.html
    try:
        data = request.get_data()
    except Exception as e:
        app.logger.error("Failed to get data for notification %s: %s", url, e)
        return make_response("", 400)

    feed = Feed.query.filter_by(unique_url=url).first()
    if not feed:
        app.logger.warning("No feed found for url %s", url)
        return make_response("", 404)

    # Get topic and hub from request link headers
    topic, hub = check_headers(request.headers, feed)

    # Always log when a feed receives a notification, no matter the result
    feed.last_update_received = datetime.utcnow()
    feed.save()

    # Create FeedStats
    stats = FeedStats.create_stats(
        feed.id, FetchType.PUSH, content_length=content_length
    )

    if not feed.subscribed:
        app.logger.warning(
            "Received notification for unsubscribed %s from hub %s at %s",
            feed,
            hub,
            request.remote_addr,
        )
        return save_stats_and_return_response(stats, 410)

    app.logger.info("Received notification for %s from %s", feed, hub)

    encoding = request.content_encoding or "UTF-8"

    # Fetch feed manually
    if feed.fetch_feed_on_notify:
        app.logger.info("Fetching RSS for %s, discarding notification", feed)
        # Create task to fetch Feed
        task_fetch_feed.delay(feed.id)
        # Two FeedStats will be created, one for the notification and one for the Fetch
        return save_stats_and_return_response(stats, 200)

    if not data:
        app.logger.warning("No data in notification for %s from %s", feed, hub)
        return save_stats_and_return_response(stats, 200, 204)

    # Check that notification secret is valid
    if feed.secret and not valid_secret(request.headers, data, feed.secret, encoding):
        app.logger.warning(
            "Notification for %s from %s did not contain valid secret signature",
            feed,
            feed.hub,
        )
        return save_stats_and_return_response(stats, 200, 400)

    # Save and return if no new content
    if not feed.new_content(data, encoding):
        return save_stats_and_return_response(stats, 200, 304)

    FeedStats.record_latency(stats)
    notification_received.send(
        bp,
        feed=feed,
        content_type=request.mimetype,
        content=data,
        encoding=encoding,
        stats=stats,
        headers=headers_to_dict(request.headers),
    )

    return make_response("", 200)