def test_faulty_dates2(self): entries = [ {"title": u"first", "updated": u"06/01/2010 CET", "published_parsed": None}, {"title": u"second", "updated": u"23/12/2009 CET", "published_parsed": None}, ] entries = entries_by_date(entries) d1 = date_to_datetime("published_parsed")(None, entries[0]) d2 = date_to_datetime("published_parsed")(None, entries[1]) self.assertTrue(d1 > d2) self.assertEqual(entries, entries_by_date(entries)) reversed_entries = list(entries) reversed_entries.reverse() self.assertNotEqual(entries, reversed_entries) self.assertEqual(entries, entries_by_date(reversed_entries))
def test_entries_by_date(self): now = datetime.now(pytz.utc) proper_list = [ {"title": "proper 1", "date_parsed": now}, {"title": "proper 2", "date_parsed": now - timedelta(seconds=10)}, {"title": "proper 3", "date_parsed": now - timedelta(seconds=20)}, {"title": "proper 4", "date_parsed": now - timedelta(seconds=30)}, ] self.assertEqual(proper_list, entries_by_date(proper_list)) improper_list = [ {"title": "improper 1"}, {"title": "improper 2"}, {"title": "improper 3"}, {"title": "improper 4"}, ] self.assertEqual(improper_list, entries_by_date(improper_list))
def test_entries_by_date(self): now = datetime.now(pytz.utc) proper_list = [ { "title": "proper 1", "date_parsed": now }, { "title": "proper 2", "date_parsed": now - timedelta(seconds=10) }, { "title": "proper 3", "date_parsed": now - timedelta(seconds=20) }, { "title": "proper 4", "date_parsed": now - timedelta(seconds=30) }, ] self.assertEqual(proper_list, entries_by_date(proper_list)) improper_list = [ { "title": "improper 1" }, { "title": "improper 2" }, { "title": "improper 3" }, { "title": "improper 4" }, ] self.assertEqual(improper_list, entries_by_date(improper_list))
def test_faulty_dates2(self): entries = [ { "title": u"first", "updated": u"06/01/2010 CET", "updated_parsed": None }, { "title": u"second", "updated": u"23/12/2009 CET", "updated_parsed": None }, ] entries = entries_by_date(entries) d1 = date_to_datetime("published_parsed")(None, entries[0]) d2 = date_to_datetime("published_parsed")(None, entries[1]) self.assertTrue(d1 > d2) self.assertEqual(entries, entries_by_date(entries)) reversed_entries = list(entries) reversed_entries.reverse() self.assertNotEqual(entries, reversed_entries) self.assertEqual(entries, entries_by_date(reversed_entries))
def update_feed(self, feed_obj, feed=None, force=False): """Update (refresh) feed. The feed must already exist in the system, if not you have to import it using :meth:`import_feed`. :param feed_obj: the Feed object :keyword feed: If feed has already been parsed you can pass the structure returned by the parser so it doesn't have to be parsed twice. :keyword force: Force refresh of the feed even if it has been recently refreshed already. """ now = datetime.utcnow().replace(tzinfo=utc) already_fresh = (feed_obj.date_last_refresh and now < feed_obj.date_last_refresh + conf.MIN_REFRESH_INTERVAL) if already_fresh and not force: self.logger.info( "Feed %s is fresh. Skipping refresh." % feed_obj.feed_url) return feed_obj limit = self.post_limit if not feed: last_modified = None if feed_obj.http_last_modified and not force: last_modified = feed_obj.http_last_modified.timetuple() etag = feed_obj.http_etag if not force else None try: feed = self.parse_feed(feed_obj.feed_url, etag=etag, modified=last_modified) except socket.timeout: return feed_obj.save_timeout_error() except Exception: return feed_obj.save_generic_error() # Feed can be local/ not fetched with HTTP client. status = feed.get("status", http.OK) if status == http.NOT_MODIFIED and not force: return feed_obj if feed_obj.is_error_status(status): return feed_obj.set_error_status(status) if feed.entries: sorted_by_date = feedutil.entries_by_date(feed.entries, limit) for entry in sorted_by_date: self.import_entry(entry, feed_obj) feed_obj.date_last_refresh = now feed_obj.http_etag = feed.get("etag", "") if hasattr(feed, "modified") and feed.modified: try: as_ts = time.mktime(feed.modified) feed_obj.http_last_modified = datetime.fromtimestamp( as_ts).replace(tzinfo=utc) except TypeError: pass self.logger.debug("uf: %s Saving feed object..." % ( feed_obj.feed_url)) feed_obj.save() return feed_obj
def update_feed(self, feed_obj, feed=None, force=False): """Update (refresh) feed. The feed must already exist in the system, if not you have to import it using :meth:`import_feed`. :param feed_obj: the Feed object :keyword feed: If feed has already been parsed you can pass the structure returned by the parser so it doesn't have to be parsed twice. :keyword force: Force refresh of the feed even if it has been recently refreshed already. """ now = datetime.utcnow().replace(tzinfo=utc) already_fresh = ( feed_obj.date_last_refresh and now < feed_obj.date_last_refresh + conf.MIN_REFRESH_INTERVAL) if already_fresh and not force: self.logger.info("Feed %s is fresh. Skipping refresh." % feed_obj.feed_url) return feed_obj limit = self.post_limit if not feed: last_modified = None if feed_obj.http_last_modified and not force: last_modified = feed_obj.http_last_modified.timetuple() etag = feed_obj.http_etag if not force else None try: feed = self.parse_feed(feed_obj.feed_url, etag=etag, modified=last_modified) except socket.timeout: return feed_obj.save_timeout_error() except Exception: return feed_obj.save_generic_error() # Feed can be local/ not fetched with HTTP client. status = feed.get("status", http.OK) if status == http.NOT_MODIFIED and not force: return feed_obj if feed_obj.is_error_status(status): return feed_obj.set_error_status(status) if feed.entries: sorted_by_date = feedutil.entries_by_date(feed.entries, limit) for entry in sorted_by_date: self.import_entry(entry, feed_obj) feed_obj.date_last_refresh = now feed_obj.http_etag = feed.get("etag", "") if hasattr(feed, "modified") and feed.modified: try: as_ts = time.mktime(feed.modified) feed_obj.http_last_modified = datetime.fromtimestamp( as_ts).replace(tzinfo=utc) except TypeError: pass self.logger.debug("uf: %s Saving feed object..." % (feed_obj.feed_url)) feed_obj.save() return feed_obj