def check_records(self): """ Checks if given records exists on the system and then returns a tuple of records that is new and records that exists: @return: a tuple of (new_records, existing_records) @rtype: tuple """ # We check if any records already exists new_records = [] existing_records = [] for record in self.records_harvested: # Do we already have the record id perhaps? if not record.recid: try: record.recid = get_record_from_doi(record.doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (record.doi, str(e))) # Problem detected, send mail immediately: problem_rec = generate_xml_for_records( records=[record], directory=self.out_folder, suffix="problem.xml") subject = "APS harvest problem: %s" % \ (self.date_started.strftime("%Y-%m-%d %H:%M:%S"),) body = "There was a problem harvesting %s. \n %s \n Path: \n%s" % \ (record.doi, str(e), problem_rec) submit_records_via_mail(subject, body, CFG_APSHARVEST_EMAIL) continue # What about now? if record.recid: existing_records.append(record) else: new_records.append(record)
def check_records(self): """ Checks if given records exists on the system and then returns a tuple of records that is new and records that exists: @return: a tuple of (new_records, existing_records) @rtype: tuple """ # We check if any records already exists new_records = [] existing_records = [] for record in self.records_harvested: # Do we already have the record id perhaps? if not record.recid: try: record.recid = get_record_from_doi(record.doi) except APSHarvesterSearchError, e: write_message("Error while getting recid from %s: %s" % (record.doi, str(e))) # Problem detected, send mail immediately: problem_rec = generate_xml_for_records(records=[record], directory=self.out_folder, suffix="problem.xml") subject = "APS harvest problem: %s" % \ (self.date_started.strftime("%Y-%m-%d %H:%M:%S"),) body = "There was a problem harvesting %s. \n %s \n Path: \n%s" % \ (record.doi, str(e), problem_rec) submit_records_via_mail(subject, body, CFG_APSHARVEST_EMAIL) continue # What about now? if record.recid: existing_records.append(record) else: new_records.append(record)
def process_record_submission(self, parameters): """Run the submission process.""" if parameters.get("match"): # We will do a simple match with the database new_records, existing_records = self.check_records() self.records_to_insert.extend(new_records) self.records_to_update.extend(existing_records) else: # We insert everything self.records_to_insert.extend(self.records_harvested) if self.records_to_insert: # Submit new records record_filename = generate_xml_for_records( self.records_to_insert, self.out_folder, prefix=self.get_file_prefix(parameters), suffix="_insert.xml" ) if not parameters.get("devmode"): taskid = self.submit_records(record_filename, parameters.get("new_mode")) if not taskid: # Something went wrong err_string = "New records (%s)" \ " were not submitted correctly" % \ (record_filename,) raise APSHarvesterSubmissionError(err_string) self.records_to_insert = [] if self.records_to_update: # Submit new records record_filename = generate_xml_for_records( self.records_to_update, self.out_folder, prefix=self.get_file_prefix(parameters), suffix="_update.xml" ) if not parameters.get("devmode"): taskid = self.submit_records(record_filename, parameters.get("update_mode"), update=True, silent=parameters.get("records") and True or False,) if not taskid: # Something went wrong err_string = "Existing records (%s)" \ " were not submitted correctly" % \ (record_filename,) raise APSHarvesterSubmissionError(err_string) self.records_to_update = [] if self.records_failed: body = "\n".join(["%s failed with error: %s" % (rec.doi or rec.recid, msg) for rec, msg in self.records_failed]) if not parameters.get("devmode"): submit_records_via_mail(subject="%s (failed records)" % (self.mail_subject,), body=body, toaddr=CFG_APSHARVEST_EMAIL)