def test_newer_date(self): """Test proper handling when bad MARCXML is sent.""" from inspire.utils.datefilter import date_older_than parsed_date = datetime.strptime("2015-01-01", "%Y-%m-%d") other_parsed_date = datetime.strptime("2015-01-04", "%Y-%m-%d") self.assertFalse(date_older_than(parsed_date, other_parsed_date, days=6))
def is_too_old(record, days_ago=5): """Return True if the record is more than days_ago days old. If the record is older then it's probably an update of an earlier record, and we don't want those. """ defense_dates = record.get('defense_date.date', '') for defense_date in defense_dates: parsed_date = datetime.datetime.strptime(defense_date, "%Y-%m-%d") if date_older_than(parsed_date, datetime.datetime.now(), days=days_ago): return True
def is_too_old(record, days_ago=5): """Return True if the record is more than days_ago days old. If the record is older then it's probably an update of an earlier record, and we don't want those. """ earliest_date = record.get('earliest_date', '') if not earliest_date: earliest_date = record.get('preprint_date', '') parsed_date = datetime.datetime.strptime(earliest_date, "%Y-%m-%d") if date_older_than(parsed_date, datetime.datetime.now(), days=days_ago): return True
def exists_in_inspire_or_rejected(obj, eng): """Check if record exist on INSPIRE or already rejected.""" # Does record exist on production yet? if match_record_arxiv_remote_oaiharvest(obj, eng): obj.log.info("Record already exists in INSPIRE.") return True # FIXME: Let's filter away CORE categories for now. # Later all harvesting will happen here. categories = obj.data.get("subject_term.term", []) for category in categories: if category.lower() in cfg.get("INSPIRE_ACCEPTED_CATEGORIES", []): obj.log.info("Record is already being harvested on INSPIRE.") return True # Check if this record should already have been rejected # (only on non-debug mode) E.g. if it is older than 2 days. if not cfg.get("DEBUG"): preprint_date = obj.data.get("preprint_info.date", "") if preprint_date: parsed_date = datetime.datetime.strptime(preprint_date, "%Y-%m-%d") if date_older_than(parsed_date, datetime.datetime.now(), days=2): obj.log.info("Record is likely rejected previously.") return True