def check_records(self): """ Checks if given records exists on the system and then returns a tuple of records that is new and records that exists: @return: a tuple of (new_records, existing_records) @rtype: tuple """ # We check if any records already exists new_records = [] existing_records = [] for record in self.records_harvested: # Do we already have the record id perhaps? if not record.recid: try: record.recid = get_record_from_doi(record.doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (record.doi, str(e))) # Problem detected, send mail immediately: problem_rec = generate_xml_for_records( records=[record], directory=self.out_folder, suffix="problem.xml") subject = "APS harvest problem: %s" % \ (self.date_started.strftime("%Y-%m-%d %H:%M:%S"),) body = "There was a problem harvesting %s. \n %s \n Path: \n%s" % \ (record.doi, str(e), problem_rec) submit_records_via_mail(subject, body, CFG_APSHARVEST_EMAIL) continue # What about now? if record.recid: existing_records.append(record) else: new_records.append(record)
def check_records(self): """ Checks if given records exists on the system and then returns a tuple of records that is new and records that exists: @return: a tuple of (new_records, existing_records) @rtype: tuple """ # We check if any records already exists new_records = [] existing_records = [] for record in self.records_harvested: # Do we already have the record id perhaps? if not record.recid: try: record.recid = get_record_from_doi(record.doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (record.doi, str(e))) # Problem detected, send mail immediately: problem_rec = generate_xml_for_records(records=[record], directory=self.out_folder, suffix="problem.xml") subject = "APS harvest problem: %s" % \ (self.date_started.strftime("%Y-%m-%d %H:%M:%S"),) body = "There was a problem harvesting %s. \n %s \n Path: \n%s" % \ (record.doi, str(e), problem_rec) submit_records_via_mail(subject, body, CFG_APSHARVEST_EMAIL) continue # What about now? if record.recid: existing_records.append(record) else: new_records.append(record)
# Turn dates back into strings (away from datetime object) harvest_from_date = harvest_from_date.strftime("%Y-%m-%d") harvest_until_date = harvest_until_date.strftime("%Y-%m-%d") final_record_list = harvest_aps(harvest_from_date, harvest_until_date, perpage) else: # We use any given IDs or records from the local Invenio instance. if len(parameters.get("dois")) > 0: write_message("Parsing DOIs...") # We are doing DOIs, we need to get record ids for doi in parameters.get("dois").split(','): doi = doi.strip() try: recid = get_record_from_doi(doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (doi, str(e))) continue if not recid: # Record not found on the system, we harvest from APS write_message("No recid found, we get record from APS") recid = None final_record_list.append(APSRecord(recid, doi)) if len(parameters.get("recids")) > 0: write_message("Parsing record IDs...") # We are doing rec ids recids = split_cli_ids_arg(parameters.get("recids"))