Example #1
0
    def test_faulty_dates2(self):
        entries = [
            {"title": u"first",
             "updated": u"06/01/2010 CET",
             "published_parsed": None},
            {"title": u"second",
             "updated": u"23/12/2009 CET",
             "published_parsed": None},
        ]
        entries = entries_by_date(entries)
        d1 = date_to_datetime("published_parsed")(None, entries[0])
        d2 = date_to_datetime("published_parsed")(None, entries[1])
        self.assertTrue(d1 > d2)

        self.assertEqual(entries, entries_by_date(entries))
        reversed_entries = list(entries)
        reversed_entries.reverse()
        self.assertNotEqual(entries, reversed_entries)
        self.assertEqual(entries, entries_by_date(reversed_entries))
Example #2
0
    def test_entries_by_date(self):
        now = datetime.now(pytz.utc)
        proper_list = [
            {"title": "proper 1", "date_parsed": now},
            {"title": "proper 2",
             "date_parsed": now - timedelta(seconds=10)},
            {"title": "proper 3",
             "date_parsed": now - timedelta(seconds=20)},
            {"title": "proper 4",
             "date_parsed": now - timedelta(seconds=30)},
        ]
        self.assertEqual(proper_list, entries_by_date(proper_list))

        improper_list = [
            {"title": "improper 1"},
            {"title": "improper 2"},
            {"title": "improper 3"},
            {"title": "improper 4"},
        ]

        self.assertEqual(improper_list, entries_by_date(improper_list))
Example #3
0
    def test_entries_by_date(self):
        now = datetime.now(pytz.utc)
        proper_list = [
            {
                "title": "proper 1",
                "date_parsed": now
            },
            {
                "title": "proper 2",
                "date_parsed": now - timedelta(seconds=10)
            },
            {
                "title": "proper 3",
                "date_parsed": now - timedelta(seconds=20)
            },
            {
                "title": "proper 4",
                "date_parsed": now - timedelta(seconds=30)
            },
        ]
        self.assertEqual(proper_list, entries_by_date(proper_list))

        improper_list = [
            {
                "title": "improper 1"
            },
            {
                "title": "improper 2"
            },
            {
                "title": "improper 3"
            },
            {
                "title": "improper 4"
            },
        ]

        self.assertEqual(improper_list, entries_by_date(improper_list))
Example #4
0
    def test_faulty_dates2(self):
        entries = [
            {
                "title": u"first",
                "updated": u"06/01/2010 CET",
                "updated_parsed": None
            },
            {
                "title": u"second",
                "updated": u"23/12/2009 CET",
                "updated_parsed": None
            },
        ]
        entries = entries_by_date(entries)
        d1 = date_to_datetime("published_parsed")(None, entries[0])
        d2 = date_to_datetime("published_parsed")(None, entries[1])
        self.assertTrue(d1 > d2)

        self.assertEqual(entries, entries_by_date(entries))
        reversed_entries = list(entries)
        reversed_entries.reverse()
        self.assertNotEqual(entries, reversed_entries)
        self.assertEqual(entries, entries_by_date(reversed_entries))
Example #5
0
    def update_feed(self, feed_obj, feed=None, force=False):
        """Update (refresh) feed.

        The feed must already exist in the system, if not you have
        to import it using :meth:`import_feed`.

        :param feed_obj: the Feed object
        :keyword feed: If feed has already been parsed you can pass the
            structure returned by the parser so it doesn't have to be parsed
            twice.
        :keyword force: Force refresh of the feed even if it has been
            recently refreshed already.

        """
        now = datetime.utcnow().replace(tzinfo=utc)
        already_fresh = (feed_obj.date_last_refresh and
                         now < feed_obj.date_last_refresh +
                         conf.MIN_REFRESH_INTERVAL)

        if already_fresh and not force:
            self.logger.info(
                    "Feed %s is fresh. Skipping refresh." % feed_obj.feed_url)
            return feed_obj

        limit = self.post_limit
        if not feed:
            last_modified = None
            if feed_obj.http_last_modified and not force:
                last_modified = feed_obj.http_last_modified.timetuple()
            etag = feed_obj.http_etag if not force else None

            try:
                feed = self.parse_feed(feed_obj.feed_url,
                                       etag=etag,
                                       modified=last_modified)
            except socket.timeout:
                return feed_obj.save_timeout_error()
            except Exception:
                return feed_obj.save_generic_error()

        # Feed can be local/ not fetched with HTTP client.
        status = feed.get("status", http.OK)
        if status == http.NOT_MODIFIED and not force:
            return feed_obj

        if feed_obj.is_error_status(status):
            return feed_obj.set_error_status(status)

        if feed.entries:
            sorted_by_date = feedutil.entries_by_date(feed.entries, limit)
            for entry in sorted_by_date:
                self.import_entry(entry, feed_obj)

        feed_obj.date_last_refresh = now
        feed_obj.http_etag = feed.get("etag", "")
        if hasattr(feed, "modified") and feed.modified:
            try:
                as_ts = time.mktime(feed.modified)
                feed_obj.http_last_modified = datetime.fromtimestamp(
                        as_ts).replace(tzinfo=utc)
            except TypeError:
                pass

        self.logger.debug("uf: %s Saving feed object..." % (
                            feed_obj.feed_url))

        feed_obj.save()
        return feed_obj
Example #6
0
    def update_feed(self, feed_obj, feed=None, force=False):
        """Update (refresh) feed.

        The feed must already exist in the system, if not you have
        to import it using :meth:`import_feed`.

        :param feed_obj: the Feed object
        :keyword feed: If feed has already been parsed you can pass the
            structure returned by the parser so it doesn't have to be parsed
            twice.
        :keyword force: Force refresh of the feed even if it has been
            recently refreshed already.

        """
        now = datetime.utcnow().replace(tzinfo=utc)
        already_fresh = (
            feed_obj.date_last_refresh
            and now < feed_obj.date_last_refresh + conf.MIN_REFRESH_INTERVAL)

        if already_fresh and not force:
            self.logger.info("Feed %s is fresh. Skipping refresh." %
                             feed_obj.feed_url)
            return feed_obj

        limit = self.post_limit
        if not feed:
            last_modified = None
            if feed_obj.http_last_modified and not force:
                last_modified = feed_obj.http_last_modified.timetuple()
            etag = feed_obj.http_etag if not force else None

            try:
                feed = self.parse_feed(feed_obj.feed_url,
                                       etag=etag,
                                       modified=last_modified)
            except socket.timeout:
                return feed_obj.save_timeout_error()
            except Exception:
                return feed_obj.save_generic_error()

        # Feed can be local/ not fetched with HTTP client.
        status = feed.get("status", http.OK)
        if status == http.NOT_MODIFIED and not force:
            return feed_obj

        if feed_obj.is_error_status(status):
            return feed_obj.set_error_status(status)

        if feed.entries:
            sorted_by_date = feedutil.entries_by_date(feed.entries, limit)
            for entry in sorted_by_date:
                self.import_entry(entry, feed_obj)

        feed_obj.date_last_refresh = now
        feed_obj.http_etag = feed.get("etag", "")
        if hasattr(feed, "modified") and feed.modified:
            try:
                as_ts = time.mktime(feed.modified)
                feed_obj.http_last_modified = datetime.fromtimestamp(
                    as_ts).replace(tzinfo=utc)
            except TypeError:
                pass

        self.logger.debug("uf: %s Saving feed object..." % (feed_obj.feed_url))

        feed_obj.save()
        return feed_obj