def load_files(inspire_ids, send_tweet=False, synchronous=False): """ :param synchronous: if should be run immediately :param send_tweet: whether or not to tweet this entry. :param inspire_ids: array of inspire ids to load (in the format insXXX). :return: None """ migrator = Migrator() for index, inspire_id in enumerate(inspire_ids): _cleaned_id = inspire_id.replace("ins", "") if not record_exists(inspire_id=_cleaned_id): print('The record with id {0} does not exist in the database, so we\'re loading it.'.format(inspire_id)) try: log.info('Loading {0}'.format(inspire_id)) if synchronous: migrator.load_file(inspire_id, send_tweet) else: migrator.load_file.delay(inspire_id, send_tweet) except socket.error as se: print('socket error...') log.error(se.message) except Exception as e: print('Failed to load {0}. {1} '.format(inspire_id, e)) log.error('Failed to load {0}. {1} '.format(inspire_id, e)) else: print('The record with inspire id {0} already exists. Updating instead.'.format(inspire_id)) log.info('Updating {}'.format(inspire_id)) if synchronous: update_submissions([inspire_id], send_tweet) else: update_submissions.delay([inspire_id], send_tweet)
def get_missing_records(): inspire_ids = get_all_ids_in_current_system(prepend_id_with="") missing_ids = [] for inspire_id in inspire_ids: if not record_exists(inspire_id=inspire_id): missing_ids.append(inspire_id) print("Missing {} records.".format(len(missing_ids))) print(missing_ids) return missing_ids
def test_create_submission(app): """ Test the whole submission pipeline in loading a file, ensuring the HEPSubmission object is created, all the files have been added, and the record has been indexed. :return: """ with app.app_context(): # test submission part works record = {'inspire_id': '19999999', 'title': 'HEPData Testing 1', 'reviewer': {'name': 'Testy McTester', 'email': '*****@*****.**'}, 'uploader': {'name': 'Testy McTester', 'email': '*****@*****.**'}, 'message': 'This is ready', 'user_id': 1} hepdata_submission = process_submission_payload(**record) assert (hepdata_submission.version == 1) assert (hepdata_submission.overall_status == 'todo') # test upload works base_dir = os.path.dirname(os.path.realpath(__file__)) directory = os.path.join(base_dir, 'test_data/test_submission') process_submission_directory(directory, os.path.join(directory, 'submission.yaml'), hepdata_submission.publication_recid) data_submissions = DataSubmission.query.filter_by( publication_recid=hepdata_submission.publication_recid).count() assert (data_submissions == 8) assert (len(hepdata_submission.resources) == 4) assert (len(hepdata_submission.participants) == 4) do_finalise(hepdata_submission.publication_recid, force_finalise=True) assert (record_exists(inspire_id=record['inspire_id'])) # Test record is in index... index_records = get_records_matching_field('inspire_id', record['inspire_id'], doc_type='publication') print(index_records) assert (len(index_records['hits']['hits']) == 1) publication_record = get_record_contents(hepdata_submission.publication_recid) print(publication_record) assert (publication_record is not None) ctx = format_submission(hepdata_submission.publication_recid, publication_record, hepdata_submission.version, 1, hepdata_submission) assert(ctx is not None) assert(ctx['version'] == 1) assert (ctx['recid'] == hepdata_submission.publication_recid)
def get_missing_records(): """ Finds all records that are missing in the new system (compared to the legacy environment) and returns the IDs as a list :return: an array of missing IDd """ inspire_ids = get_all_ids_in_current_system(prepend_id_with="") missing_ids = [] for inspire_id in inspire_ids: if not record_exists(inspire_id=inspire_id): missing_ids.append(inspire_id) print("Missing {} records.".format(len(missing_ids))) print(missing_ids) return missing_ids
def load_files(inspire_ids, send_tweet=False, synchronous=False, convert=False, base_url='http://hepdata.cedar.ac.uk/view/{0}/yaml'): """ :param base_url: override default base URL :param convert: :param synchronous: if should be run immediately :param send_tweet: whether or not to tweet this entry. :param inspire_ids: array of inspire ids to load (in the format insXXX). :return: None """ migrator = Migrator() for index, inspire_id in enumerate(inspire_ids): _cleaned_id = inspire_id.replace("ins", "") if not record_exists(inspire_id=_cleaned_id): print( "The record with id {0} does not exist in the database, so we're loading it." .format(inspire_id)) try: log.info("Loading {0}".format(inspire_id)) if synchronous: migrator.load_file(inspire_id, send_tweet, convert=convert, base_url=base_url) else: migrator.load_file.delay(inspire_id, send_tweet, convert=convert, base_url=base_url) except socket.error as se: print("socket error...") log.error(se.message) except Exception as e: print("Failed to load {0}. {1} ".format(inspire_id, e)) log.error("Failed to load {0}. {1} ".format(inspire_id, e)) else: print( "The record with inspire id {0} already exists. Updating instead." .format(inspire_id)) log.info("Updating {}".format(inspire_id)) if synchronous: update_submissions([inspire_id]) else: update_submissions.delay([inspire_id])
def get_record_from_inspire(): if 'id' not in request.args: return jsonify({'status': 'no inspire id provided'}) inspire_id = request.args['id'] content, status = get_inspire_record_information(inspire_id) # check that id is not present already. exists = record_exists(inspire_id=inspire_id) if exists: status = 'exists' return jsonify({'source': 'inspire', 'id': inspire_id, 'query': content, 'status': status})
def get_record_from_inspire(): if 'id' not in request.args: return jsonify({'status': 'no inspire id provided'}) inspire_id = request.args['id'] # check that id is not present already. exists = record_exists(inspire_id=inspire_id) if exists: return jsonify({'status': 'exists', 'id': inspire_id}) content, status = get_inspire_record_information(inspire_id) return jsonify({'source': 'inspire', 'id': inspire_id, 'query': content, 'status': status})
def test_create_submission(app, admin_idx): """ Test the whole submission pipeline in loading a file, ensuring the HEPSubmission object is created, all the files have been added, and the record has been indexed. :return: """ with app.app_context(): admin_idx.recreate_index() # test submission part works record = { 'inspire_id': '19999999', 'title': 'HEPData Testing 1', 'reviewer': { 'name': 'Testy McTester', 'email': '*****@*****.**' }, 'uploader': { 'name': 'Testy McTester', 'email': '*****@*****.**' }, 'message': 'This is ready', 'user_id': 1 } hepdata_submission = process_submission_payload(**record) assert (hepdata_submission.version == 1) assert (hepdata_submission.overall_status == 'todo') # test upload works base_dir = os.path.dirname(os.path.realpath(__file__)) test_directory = os.path.join(base_dir, 'test_data/test_submission') time_stamp = str(int(round(time.time()))) directory = get_data_path_for_record( hepdata_submission.publication_recid, time_stamp) shutil.copytree(test_directory, directory) assert (os.path.exists(directory)) process_submission_directory( directory, os.path.join(directory, 'submission.yaml'), hepdata_submission.publication_recid) admin_idx_results = admin_idx.search( term=hepdata_submission.publication_recid, fields=['recid']) assert (admin_idx_results is not None) data_submissions = DataSubmission.query.filter_by( publication_recid=hepdata_submission.publication_recid).count() assert (data_submissions == 8) assert (len(hepdata_submission.resources) == 4) assert (len(hepdata_submission.participants) == 4) do_finalise(hepdata_submission.publication_recid, force_finalise=True, convert=False) assert (record_exists(inspire_id=record['inspire_id'])) # Test record is in index... index_records = get_records_matching_field('inspire_id', record['inspire_id'], doc_type='publication') assert (len(index_records['hits']['hits']) == 1) publication_record = get_record_contents( hepdata_submission.publication_recid) assert (publication_record is not None) ctx = format_submission(hepdata_submission.publication_recid, publication_record, hepdata_submission.version, 1, hepdata_submission) assert (ctx is not None) assert (ctx['version'] == 1) assert (ctx['recid'] == hepdata_submission.publication_recid) # remove the submission and test that all is remove unload_submission(hepdata_submission.publication_recid) assert (not record_exists(inspire_id=record['inspire_id'])) data_submissions = DataSubmission.query.filter_by( publication_recid=hepdata_submission.publication_recid).count() assert (data_submissions == 0) sleep(2) admin_idx_results = admin_idx.search( term=hepdata_submission.publication_recid, fields=['recid']) assert (len(admin_idx_results) == 0) # Check file dir has been deleted assert (not os.path.exists(directory))