def delete_submission(recid): """ Submissions can only be removed if they are not finalised, meaning they should never be in the index. Only delete the latest version of a submission. Delete indexed information only if version = 1. :param recid: :return: """ if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \ or check_is_sandbox_record(recid): submission = get_latest_hepsubmission(publication_recid=recid) unload_submission(recid, submission.version) if submission.version == 1: admin_idx = AdminIndexer() admin_idx.find_and_delete('recid', recid) return json.dumps({"success": True, "recid": recid, "errors": [ "Record successfully removed!"]}) else: return json.dumps( {"success": False, "recid": recid, "errors": [ "You do not have permission to delete this submission. " "Only coordinators can do that."]})
def test_update_record_info(app): """Test update of publication information from INSPIRE.""" assert update_record_info( None) == 'Inspire ID is None' # case where Inspire ID is None for inspire_id in ( '1311487', '19999999'): # check both a valid and invalid Inspire ID assert update_record_info( inspire_id ) == 'No HEPData submission' # before creation of HEPSubmission object submission = process_submission_payload(inspire_id=inspire_id, submitter_id=1, reviewer={ 'name': 'Reviewer', 'email': '*****@*****.**' }, uploader={ 'name': 'Uploader', 'email': '*****@*****.**' }, send_upload_email=False) # Process the files to create DataSubmission tables in the DB. base_dir = os.path.dirname(os.path.realpath(__file__)) directory = os.path.join(base_dir, 'test_data/test_submission') tmp_path = os.path.join(tempfile.mkdtemp(dir=CFG_TMPDIR), 'test_submission') shutil.copytree(directory, tmp_path) process_submission_directory(tmp_path, os.path.join(tmp_path, 'submission.yaml'), submission.publication_recid) do_finalise(submission.publication_recid, force_finalise=True, convert=False) if inspire_id == '19999999': assert update_record_info(inspire_id) == 'Invalid Inspire ID' else: # First change the publication information to that of a different record. different_inspire_record_information, status = get_inspire_record_information( '1650066') assert status == 'success' hep_submission = get_latest_hepsubmission(inspire_id=inspire_id) assert hep_submission is not None update_record(hep_submission.publication_recid, different_inspire_record_information) # Then can check that the update works and that a further update is not required. assert update_record_info(inspire_id, send_email=True) == 'Success' assert update_record_info( inspire_id ) == 'No update needed' # check case where information already current unload_submission(submission.publication_recid)
def test_tables(app, live_server, env_browser): """E2E test to tables in a record.""" browser = env_browser # Import record with non-default table names import_default_data(app, [{'hepdata_id': 'ins1206352'}]) try: browser.get(flask.url_for('hepdata_theme.index', _external=True)) assert (flask.url_for('hepdata_theme.index', _external=True) in browser.current_url) latest_item = browser.find_element_by_css_selector( '.latest-record .title') actions = ActionChains(browser) actions.move_to_element(latest_item).perform() latest_item.click() # Check current table name assert (browser.find_element_by_id('table_name').text == 'Figure 8 panel (a)') # Check switching tables works as expected new_table = browser.find_elements_by_css_selector( '#table-list li h4')[2] assert (new_table.text == "Figure 8 panel (c)") new_table.click() _check_table_links(browser, "Figure 8 panel (c)") # Get link to table from table page table_link = browser.find_element_by_css_selector('#data_link_container button') \ .get_attribute('data-clipboard-text') assert (table_link.endswith('table=Figure%208%20panel%20(c)')) _check_table_links(browser, "Figure 8 panel (c)", url=table_link) # Check a link to a table name with spaces removed short_table_link = table_link.replace('%20', '') _check_table_links(browser, "Figure 8 panel (c)", url=short_table_link) # Check a link to an invalid table invalid_table_link = table_link.replace('Figure%208%20panel%20(c)', 'NotARealTable') _check_table_links(browser, "Figure 8 panel (a)", url=invalid_table_link) finally: # Delete record and reindex so added record doesn't affect other tests submission = get_latest_hepsubmission(inspire_id='1206352') unload_submission(submission.publication_recid) reindex_all(recreate=True)
def delete_submission(recid): """ Submissions can only be removed if they are not finalised, meaning they should never be in the index. :param recid: :return: """ if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \ or check_is_sandbox_record(recid): unload_submission(recid) admin_idx = AdminIndexer() admin_idx.reindex(recreate=True) return json.dumps({"success": True, "recid": recid, "errors": [ "Record successfully removed!"]}) else: return json.dumps( {"success": False, "recid": recid, "errors": [ "You do not have permission to delete this submission. " "Only coordinators can do that."]})
def create_mock_migrated_record(inspire_id, send_email): """ Populate the DB with a specific record which mimics a record migrated from hepdata.cedar.ac.uk. Accepts inspire ids 753951, 1299143, 1320775. Usage: ``hepdata utils create-mock-migrated-record`` """ if current_app.config.get('ENV') == 'production': click.confirm( 'You are currently running in production mode on' ' %s. Are you sure you want to add a mock migrated record?' % current_app.config.get('SITE_URL'), abort=True) # Delete current record if it already exists current_submission = get_latest_hepsubmission(inspire_id=inspire_id) if current_submission: click.confirm( 'Inspire record %s already exists. Do you want to recreate it?' % inspire_id, abort=True) unload_submission(current_submission.publication_recid) mock_import_old_record(inspire_id, send_email=send_email)
def do_unload(records_to_unload): for record_id in records_to_unload: unload_submission(record_id)
def test_create_submission(app, admin_idx): """ Test the whole submission pipeline in loading a file, ensuring the HEPSubmission object is created, all the files have been added, and the record has been indexed. :return: """ with app.app_context(): admin_idx.recreate_index() # test submission part works record = { 'inspire_id': '19999999', 'title': 'HEPData Testing 1', 'reviewer': { 'name': 'Testy McTester', 'email': '*****@*****.**' }, 'uploader': { 'name': 'Testy McTester', 'email': '*****@*****.**' }, 'message': 'This is ready', 'user_id': 1 } hepdata_submission = process_submission_payload(**record) assert (hepdata_submission.version == 1) assert (hepdata_submission.overall_status == 'todo') # test upload works base_dir = os.path.dirname(os.path.realpath(__file__)) test_directory = os.path.join(base_dir, 'test_data/test_submission') time_stamp = str(int(round(time.time()))) directory = get_data_path_for_record( hepdata_submission.publication_recid, time_stamp) shutil.copytree(test_directory, directory) assert (os.path.exists(directory)) process_submission_directory( directory, os.path.join(directory, 'submission.yaml'), hepdata_submission.publication_recid) admin_idx_results = admin_idx.search( term=hepdata_submission.publication_recid, fields=['recid']) assert (admin_idx_results is not None) data_submissions = DataSubmission.query.filter_by( publication_recid=hepdata_submission.publication_recid).count() assert (data_submissions == 8) assert (len(hepdata_submission.resources) == 4) assert (len(hepdata_submission.participants) == 4) do_finalise(hepdata_submission.publication_recid, force_finalise=True, convert=False) assert (record_exists(inspire_id=record['inspire_id'])) # Test record is in index... index_records = get_records_matching_field('inspire_id', record['inspire_id'], doc_type='publication') assert (len(index_records['hits']['hits']) == 1) publication_record = get_record_contents( hepdata_submission.publication_recid) assert (publication_record is not None) ctx = format_submission(hepdata_submission.publication_recid, publication_record, hepdata_submission.version, 1, hepdata_submission) assert (ctx is not None) assert (ctx['version'] == 1) assert (ctx['recid'] == hepdata_submission.publication_recid) # remove the submission and test that all is remove unload_submission(hepdata_submission.publication_recid) assert (not record_exists(inspire_id=record['inspire_id'])) data_submissions = DataSubmission.query.filter_by( publication_recid=hepdata_submission.publication_recid).count() assert (data_submissions == 0) sleep(2) admin_idx_results = admin_idx.search( term=hepdata_submission.publication_recid, fields=['recid']) assert (len(admin_idx_results) == 0) # Check file dir has been deleted assert (not os.path.exists(directory))
def test_upload_valid_file(app): # Test uploading and processing a file for a record with app.app_context(): base_dir = os.path.dirname(os.path.realpath(__file__)) for i, status in enumerate(["todo", "sandbox"]): user = User.query.first() login_user(user) recid = f'12345{i}' get_or_create_hepsubmission(recid, 1, status=status) hepdata_submission = HEPSubmission.query.filter_by( publication_recid=recid).first() assert (hepdata_submission is not None) assert (hepdata_submission.data_abstract is None) assert (hepdata_submission.created < hepdata_submission.last_updated) assert (hepdata_submission.version == 1) assert (hepdata_submission.overall_status == status) with open( os.path.join(base_dir, 'test_data/TestHEPSubmission.zip'), "rb") as stream: test_file = FileStorage(stream=stream, filename="TestHEPSubmission.zip") response = process_payload(recid, test_file, '/test_redirect_url', synchronous=True) assert (response.json == {'url': '/test_redirect_url'}) # Check the submission has been updated hepdata_submission = HEPSubmission.query.filter_by( publication_recid=recid).first() assert (hepdata_submission.data_abstract.startswith( 'CERN-LHC. Measurements of the cross section for ZZ production' )) assert (hepdata_submission.created < hepdata_submission.last_updated) assert (hepdata_submission.version == 1) assert (hepdata_submission.overall_status == status) # Set the status to finished and try again, to check versioning if status == "todo": hepdata_submission.overall_status = 'finished' db.session.add(hepdata_submission) # Sleep before uploading new version to avoid dir name conflict sleep(1) # Refresh user user = User.query.first() login_user(user) # Upload a new version with open( os.path.join(base_dir, 'test_data/TestHEPSubmission.zip'), "rb") as stream: test_file = FileStorage(stream=stream, filename="TestHEPSubmission.zip") process_payload(recid, test_file, '/test_redirect_url', synchronous=True) # Check the submission has been updated (overridden for a sandbox; # new version for normal submission) expected_versions = 2 if status == "todo" else 1 hepdata_submissions = HEPSubmission.query.filter_by( publication_recid=recid).order_by( HEPSubmission.last_updated).all() assert (len(hepdata_submissions) == expected_versions) assert (hepdata_submissions[0].version == 1) if status == "todo": assert (hepdata_submissions[0].overall_status == 'finished') assert (hepdata_submissions[-1].data_abstract.startswith( 'CERN-LHC. Measurements of the cross section for ZZ production' )) assert (hepdata_submissions[-1].version == expected_versions) assert (hepdata_submissions[-1].overall_status == status) # Check that there are the expected number of subdirectories and # zip files under the record's main path # For status = 'todo' (standard submission) there will be 1 file # and 1 dir for each of 2 versions; for the sandbox submission # there will just be 1 file and 1 dir. directory = get_data_path_for_record( hepdata_submission.publication_recid) assert (os.path.exists(directory)) filepaths = os.listdir(directory) assert (len(filepaths) == 2 * expected_versions) dir_count = 0 file_count = 0 for path in filepaths: if os.path.isdir(os.path.join(directory, path)): dir_count += 1 assert (re.match(r"\d{10}", path) is not None) else: file_count += 1 assert (re.match(r"HEPData-%s-v[12]-yaml.zip" % recid, path) is not None) assert (dir_count == expected_versions) assert (file_count == expected_versions) if status == "todo": # Delete the v2 submission and check db and v2 files have been removed unload_submission(hepdata_submission.publication_recid, version=2) hepdata_submissions = HEPSubmission.query.filter_by( publication_recid=recid).order_by( HEPSubmission.last_updated).all() assert (len(hepdata_submissions) == 1) assert (hepdata_submissions[0].version == 1) assert (hepdata_submissions[0].overall_status == 'finished') filepaths = os.listdir(directory) assert (len(filepaths) == 2) assert (f"HEPData-12345{i}-v1-yaml.zip" in filepaths) # Delete the submission and check everything has been removed unload_submission(hepdata_submission.publication_recid, version=1) hepdata_submissions = HEPSubmission.query.filter_by( publication_recid=recid).order_by( HEPSubmission.last_updated).all() assert (len(hepdata_submissions) == 0) assert (not os.path.exists(directory))
def do_unload(records_to_unload): for record_id in records_to_unload: unload_submission(record_id)
def _import_record(inspire_id, update_existing=False, base_url='https://hepdata.net', send_email=False): publication_information, status = get_inspire_record_information( inspire_id) if status != "success": log.error("Failed to retrieve publication information for " + inspire_id) return False current_submission = get_latest_hepsubmission(inspire_id=inspire_id) if not current_submission: log.info( "The record with id {0} does not exist in the database, so we're loading it." .format(inspire_id)) publication_information["inspire_id"] = inspire_id record_information = create_record(publication_information) recid = record_information['recid'] else: log.info("The record with inspire id {0} already exists.".format( inspire_id)) if update_existing: log.info("Updating instead") recid = current_submission.publication_recid else: log.info("Not updating as update_existing is False") return False try: download_path = _download_file(base_url, inspire_id) filename = os.path.basename(download_path) time_stamp = str(int(round(time.time()))) file_save_directory = get_data_path_for_record(str(recid), time_stamp) if not os.path.exists(file_save_directory): os.makedirs(file_save_directory) file_path = os.path.join(file_save_directory, filename) log.info("Moving file to %s" % file_path) shutil.copy(download_path, file_path) # Create submission admin_user_id = 1 hepsubmission = get_or_create_hepsubmission(recid, admin_user_id) db.session.add(hepsubmission) db.session.commit() # Then process the payload as for any other record errors = process_zip_archive(file_path, recid) if errors: log.info("Errors processing archive. Re-trying with old schema.") # Try again with old schema # Need to clean up first to avoid errors # First delete tables cleanup_submission(recid, 1, []) # Next remove remaining files file_save_directory = os.path.dirname(file_path) submission_path = os.path.join(file_save_directory, remove_file_extension(filename)) shutil.rmtree(submission_path) errors = process_zip_archive(file_path, recid, old_submission_schema=True, old_data_schema=True) if errors: log.error("Could not process zip archive: ") for file, file_errors in errors.items(): log.error(" %s:" % file) for error in file_errors: log.error(" %s" % error['message']) raise ValueError("Could not validate record.") # Delete any previous upload folders cleanup_old_files(hepsubmission) log.info("Finalising record %s" % recid) result_json = do_finalise(recid, force_finalise=True, update=(current_submission is not None), convert=False, send_email=send_email) result = json.loads(result_json) if result and result['success']: log.info("Imported record %s with %s submissions" % (recid, result['data_count'])) return True else: raise ValueError("Failed to finalise record.") except Exception as e: # Unload record unload_submission(recid) log.error(e) return False