def update_file(inspire_id, recid, only_record_information=False, send_tweet=False): self = Migrator() output_location = self.prepare_files_for_submission(inspire_id, force_retrieval=True) if output_location: updated_record_information = self.retrieve_publication_information(inspire_id) record_information = update_record(recid, updated_record_information) if not only_record_information: try: recid = self.load_submission( record_information, output_location, os.path.join(output_location, "submission.yaml"), update=True) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, update=True) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) else: index_record_ids([record_information['recid']]) else: log.error('Failed to load {0}'.format(inspire_id))
def load_file(inspire_id, send_tweet=False, convert=False, base_url='http://hepdata.cedar.ac.uk/view/{0}/yaml'): self = Migrator(base_url) output_location, oldsite_last_updated = self.prepare_files_for_submission( inspire_id) if output_location: record_information = create_record( self.retrieve_publication_information(inspire_id)) try: recid = self.load_submission( record_information, output_location, os.path.join(output_location, "submission.yaml")) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, convert=convert) return True except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) return False else: log.error("Failed to load " + inspire_id) return False
def update_file(inspire_id, recid, force=False, only_record_information=False, send_tweet=False, convert=False): self = Migrator() output_location, oldsite_last_updated = self.prepare_files_for_submission( inspire_id, force_retrieval=True) if output_location: updated_record_information = self.retrieve_publication_information( inspire_id) record_information = update_record(recid, updated_record_information) hep_submission = HEPSubmission.query.filter_by( publication_recid=recid).first() version_count = HEPSubmission.query.filter_by( publication_recid=recid).count() print('Old site last updated {}'.format(str(oldsite_last_updated))) print('New site last updated {}'.format( str(hep_submission.last_updated))) print('Coordinator ID is {}, version count is {}'.format( hep_submission.coordinator, version_count)) allow_update = hep_submission.last_updated < oldsite_last_updated and \ hep_submission.coordinator == 1 and version_count == 1 if not only_record_information and (allow_update or force): try: recid = self.load_submission(record_information, output_location, os.path.join( output_location, "submission.yaml"), update=True) print('Loaded record {}'.format(recid)) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, update=True, convert=convert) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) elif not only_record_information: print('Not updating record {}'.format(recid)) else: index_record_ids([record_information["recid"]]) else: log.error("Failed to load {0}".format(inspire_id))
def load_file(inspire_id, send_tweet): self = Migrator() output_location = self.prepare_files_for_submission(inspire_id) if output_location: record_information = self.retrieve_publication_information(inspire_id) try: recid = self.load_submission( record_information, output_location, os.path.join(output_location, "submission.yaml")) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) else: log.error('Failed to load ' + inspire_id)
def update_file(inspire_id, recid, force=False, only_record_information=False, send_email=False, send_tweet=False, convert=False): self = Migrator() output_location, oldsite_last_updated = self.prepare_files_for_submission( inspire_id, force_retrieval=True) if output_location: updated_record_information, status = self.retrieve_publication_information( inspire_id) if status == 'success': record_information = update_record(recid, updated_record_information) else: log.error("Failed to retrieve publication information for {0}". format(inspire_id)) return hep_submission = HEPSubmission.query.filter_by( publication_recid=recid).first() version_count = HEPSubmission.query.filter_by( publication_recid=recid).count() print('Old site last updated {}'.format(str(oldsite_last_updated))) print('New site last updated {}'.format( str(hep_submission.last_updated))) print('Coordinator ID is {}, version count is {}'.format( hep_submission.coordinator, version_count)) allow_update = (hep_submission.last_updated < oldsite_last_updated or force) and \ hep_submission.coordinator == 1 and version_count == 1 if not only_record_information and allow_update: try: recid = self.load_submission(record_information, output_location, os.path.join( output_location, "submission.yaml"), update=True) print('Loaded record {}'.format(recid)) if recid is not None: do_finalise(recid, publication_record=record_information, force_finalise=True, send_tweet=send_tweet, update=True, convert=convert) except FailedSubmission as fe: log.error(fe.message) fe.print_errors() remove_submission(fe.record_id) elif not only_record_information: print('Not updating record {}'.format(recid)) else: index_record_ids([record_information["recid"]]) _cleaned_id = inspire_id.replace("ins", "") generate_dois_for_submission.delay( inspire_id=_cleaned_id ) # update metadata stored in DataCite if send_email: notify_publication_update( hep_submission, record_information) # send email to all participants else: log.error("Failed to load {0}".format(inspire_id))
def mock_import_old_record(inspire_id=mock_inspire_ids[1], send_email=False): """Creates a submission but mimics the old migrated paths. (See hepdata master branch at ccd691b for old migrator module.) """ if inspire_id not in mock_inspire_ids: raise ValueError('Invalid inspire id %s. Accepted values are: %s' % (inspire_id, ', '.join(mock_inspire_ids))) # Use zipped test data for specific record(s) publication_information, status = get_inspire_record_information( inspire_id) publication_information["inspire_id"] = inspire_id # Create record if status == "success": record_information = create_record(publication_information) else: log.error("Failed to retrieve publication information for " + inspire_id) return False # Unzip into correct data dir data_path = get_data_path_for_record(record_information['recid']) base_dir = os.path.dirname(os.path.realpath(__file__)) zip_path = os.path.join(base_dir, 'old_hepdata_zips', 'ins%s.zip' % inspire_id) if os.path.isfile(zip_path): log.info('Unzipping %s to %s' % (zip_path, data_path)) shutil.unpack_archive(zip_path, data_path) time_stamp = str(int(round(time.time()))) yaml_path = os.path.join(data_path, time_stamp) sub_zip_path = os.path.join(data_path, 'ins%s.zip' % inspire_id) shutil.unpack_archive(sub_zip_path, yaml_path) else: log.error('Invalid path %s' % zip_path) return False # Create submission admin_user_id = 1 # Consume data payload and store in db. get_or_create_hepsubmission(record_information["recid"], admin_user_id) errors = process_submission_directory(yaml_path, os.path.join(yaml_path, "submission.yaml"), record_information["recid"], old_submission_schema=True, old_data_schema=True) if errors: log.error( "Submission failed for {0}.".format(record_information["recid"]), errors, record_information["recid"]) return False do_finalise(record_information['recid'], publication_record=record_information, force_finalise=True, convert=False, send_email=send_email)
def _import_record(inspire_id, update_existing=False, base_url='https://hepdata.net', send_email=False): publication_information, status = get_inspire_record_information( inspire_id) if status != "success": log.error("Failed to retrieve publication information for " + inspire_id) return False current_submission = get_latest_hepsubmission(inspire_id=inspire_id) if not current_submission: log.info( "The record with id {0} does not exist in the database, so we're loading it." .format(inspire_id)) publication_information["inspire_id"] = inspire_id record_information = create_record(publication_information) recid = record_information['recid'] else: log.info("The record with inspire id {0} already exists.".format( inspire_id)) if update_existing: log.info("Updating instead") recid = current_submission.publication_recid else: log.info("Not updating as update_existing is False") return False try: download_path = _download_file(base_url, inspire_id) filename = os.path.basename(download_path) time_stamp = str(int(round(time.time()))) file_save_directory = get_data_path_for_record(str(recid), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) file_path = os.path.join(file_save_directory, filename) log.info("Moving file to %s" % file_path) shutil.copy(download_path, file_path) # Create submission admin_user_id = 1 hepsubmission = get_or_create_hepsubmission(recid, admin_user_id) db.session.add(hepsubmission) db.session.commit() # Then process the payload as for any other record errors = process_zip_archive(file_path, recid) if errors: log.info("Errors processing archive. Re-trying with old schema.") # Try again with old schema # Need to clean up first to avoid errors # First delete tables cleanup_submission(recid, 1, []) # Next remove remaining files file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) shutil.rmtree(submission_path) errors = process_zip_archive(file_path, recid, old_submission_schema=True, old_data_schema=True) if errors: log.error("Could not process zip archive: ") for file, file_errors in errors.items(): log.error(" %s:" % file) for error in file_errors: log.error(" %s" % error['message']) raise ValueError("Could not validate record.") # Delete any previous upload folders cleanup_old_files(hepsubmission) log.info("Finalising record %s" % recid) result_json = do_finalise(recid, force_finalise=True, update=(current_submission is not None), convert=False, send_email=send_email) result = json.loads(result_json) if result and result['success']: log.info("Imported record %s with %s submissions" % (recid, result['data_count'])) return True else: raise ValueError("Failed to finalise record.") except Exception as e: # Unload record unload_submission(recid) log.error(e) return False