Example #1
0
def delete_submission(recid):
    """
    Submissions can only be removed if they are not finalised,
    meaning they should never be in the index.
    Only delete the latest version of a submission.
    Delete indexed information only if version = 1.

    :param recid:
    :return:
    """
    if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \
        or check_is_sandbox_record(recid):

        submission = get_latest_hepsubmission(publication_recid=recid)
        unload_submission(recid, submission.version)

        if submission.version == 1:
            admin_idx = AdminIndexer()
            admin_idx.find_and_delete('recid', recid)

        return json.dumps({"success": True,
                           "recid": recid,
                           "errors": [
                               "Record successfully removed!"]})
    else:
        return json.dumps(
            {"success": False, "recid": recid,
             "errors": [
                 "You do not have permission to delete this submission. "
                 "Only coordinators can do that."]})
Example #2
0
def test_update_record_info(app):
    """Test update of publication information from INSPIRE."""
    assert update_record_info(
        None) == 'Inspire ID is None'  # case where Inspire ID is None
    for inspire_id in (
            '1311487',
            '19999999'):  # check both a valid and invalid Inspire ID
        assert update_record_info(
            inspire_id
        ) == 'No HEPData submission'  # before creation of HEPSubmission object
        submission = process_submission_payload(inspire_id=inspire_id,
                                                submitter_id=1,
                                                reviewer={
                                                    'name': 'Reviewer',
                                                    'email':
                                                    '*****@*****.**'
                                                },
                                                uploader={
                                                    'name': 'Uploader',
                                                    'email':
                                                    '*****@*****.**'
                                                },
                                                send_upload_email=False)

        # Process the files to create DataSubmission tables in the DB.
        base_dir = os.path.dirname(os.path.realpath(__file__))
        directory = os.path.join(base_dir, 'test_data/test_submission')
        tmp_path = os.path.join(tempfile.mkdtemp(dir=CFG_TMPDIR),
                                'test_submission')
        shutil.copytree(directory, tmp_path)
        process_submission_directory(tmp_path,
                                     os.path.join(tmp_path, 'submission.yaml'),
                                     submission.publication_recid)
        do_finalise(submission.publication_recid,
                    force_finalise=True,
                    convert=False)

        if inspire_id == '19999999':
            assert update_record_info(inspire_id) == 'Invalid Inspire ID'
        else:

            # First change the publication information to that of a different record.
            different_inspire_record_information, status = get_inspire_record_information(
                '1650066')
            assert status == 'success'
            hep_submission = get_latest_hepsubmission(inspire_id=inspire_id)
            assert hep_submission is not None
            update_record(hep_submission.publication_recid,
                          different_inspire_record_information)

            # Then can check that the update works and that a further update is not required.
            assert update_record_info(inspire_id, send_email=True) == 'Success'
            assert update_record_info(
                inspire_id
            ) == 'No update needed'  # check case where information already current

        unload_submission(submission.publication_recid)
Example #3
0
def test_tables(app, live_server, env_browser):
    """E2E test to tables in a record."""
    browser = env_browser

    # Import record with non-default table names
    import_default_data(app, [{'hepdata_id': 'ins1206352'}])

    try:
        browser.get(flask.url_for('hepdata_theme.index', _external=True))
        assert (flask.url_for('hepdata_theme.index', _external=True)
                in browser.current_url)

        latest_item = browser.find_element_by_css_selector(
            '.latest-record .title')
        actions = ActionChains(browser)
        actions.move_to_element(latest_item).perform()
        latest_item.click()

        # Check current table name
        assert (browser.find_element_by_id('table_name').text ==
                'Figure 8 panel (a)')

        # Check switching tables works as expected
        new_table = browser.find_elements_by_css_selector(
            '#table-list li h4')[2]
        assert (new_table.text == "Figure 8 panel (c)")
        new_table.click()
        _check_table_links(browser, "Figure 8 panel (c)")

        # Get link to table from table page
        table_link = browser.find_element_by_css_selector('#data_link_container button') \
            .get_attribute('data-clipboard-text')
        assert (table_link.endswith('table=Figure%208%20panel%20(c)'))
        _check_table_links(browser, "Figure 8 panel (c)", url=table_link)

        # Check a link to a table name with spaces removed
        short_table_link = table_link.replace('%20', '')
        _check_table_links(browser, "Figure 8 panel (c)", url=short_table_link)

        # Check a link to an invalid table
        invalid_table_link = table_link.replace('Figure%208%20panel%20(c)',
                                                'NotARealTable')
        _check_table_links(browser,
                           "Figure 8 panel (a)",
                           url=invalid_table_link)

    finally:
        # Delete record and reindex so added record doesn't affect other tests
        submission = get_latest_hepsubmission(inspire_id='1206352')
        unload_submission(submission.publication_recid)
        reindex_all(recreate=True)
Example #4
0
def delete_submission(recid):
    """
    Submissions can only be removed if they are not finalised,
    meaning they should never be in the index.
    :param recid:
    :return:
    """
    if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \
        or check_is_sandbox_record(recid):
        unload_submission(recid)

        admin_idx = AdminIndexer()
        admin_idx.reindex(recreate=True)
        return json.dumps({"success": True,
                           "recid": recid,
                           "errors": [
                               "Record successfully removed!"]})
    else:
        return json.dumps(
            {"success": False, "recid": recid,
             "errors": [
                 "You do not have permission to delete this submission. "
                 "Only coordinators can do that."]})
Example #5
0
def create_mock_migrated_record(inspire_id, send_email):
    """
    Populate the DB with a specific record which mimics a record migrated from
    hepdata.cedar.ac.uk. Accepts inspire ids 753951, 1299143, 1320775.

    Usage: ``hepdata utils create-mock-migrated-record``
    """
    if current_app.config.get('ENV') == 'production':
        click.confirm(
            'You are currently running in production mode on'
            ' %s. Are you sure you want to add a mock migrated record?' %
            current_app.config.get('SITE_URL'),
            abort=True)

    # Delete current record if it already exists
    current_submission = get_latest_hepsubmission(inspire_id=inspire_id)
    if current_submission:
        click.confirm(
            'Inspire record %s already exists. Do you want to recreate it?' %
            inspire_id,
            abort=True)
        unload_submission(current_submission.publication_recid)

    mock_import_old_record(inspire_id, send_email=send_email)
Example #6
0
def do_unload(records_to_unload):
    for record_id in records_to_unload:
        unload_submission(record_id)
Example #7
0
def test_create_submission(app, admin_idx):
    """
    Test the whole submission pipeline in loading a file, ensuring the HEPSubmission object is created,
    all the files have been added, and the record has been indexed.
    :return:
    """
    with app.app_context():

        admin_idx.recreate_index()

        # test submission part works

        record = {
            'inspire_id': '19999999',
            'title': 'HEPData Testing 1',
            'reviewer': {
                'name': 'Testy McTester',
                'email': '*****@*****.**'
            },
            'uploader': {
                'name': 'Testy McTester',
                'email': '*****@*****.**'
            },
            'message': 'This is ready',
            'user_id': 1
        }

        hepdata_submission = process_submission_payload(**record)

        assert (hepdata_submission.version == 1)
        assert (hepdata_submission.overall_status == 'todo')

        # test upload works
        base_dir = os.path.dirname(os.path.realpath(__file__))

        test_directory = os.path.join(base_dir, 'test_data/test_submission')
        time_stamp = str(int(round(time.time())))
        directory = get_data_path_for_record(
            hepdata_submission.publication_recid, time_stamp)
        shutil.copytree(test_directory, directory)
        assert (os.path.exists(directory))

        process_submission_directory(
            directory, os.path.join(directory, 'submission.yaml'),
            hepdata_submission.publication_recid)

        admin_idx_results = admin_idx.search(
            term=hepdata_submission.publication_recid, fields=['recid'])
        assert (admin_idx_results is not None)

        data_submissions = DataSubmission.query.filter_by(
            publication_recid=hepdata_submission.publication_recid).count()
        assert (data_submissions == 8)
        assert (len(hepdata_submission.resources) == 4)
        assert (len(hepdata_submission.participants) == 4)

        do_finalise(hepdata_submission.publication_recid,
                    force_finalise=True,
                    convert=False)

        assert (record_exists(inspire_id=record['inspire_id']))

        # Test record is in index...
        index_records = get_records_matching_field('inspire_id',
                                                   record['inspire_id'],
                                                   doc_type='publication')
        assert (len(index_records['hits']['hits']) == 1)

        publication_record = get_record_contents(
            hepdata_submission.publication_recid)

        assert (publication_record is not None)

        ctx = format_submission(hepdata_submission.publication_recid,
                                publication_record, hepdata_submission.version,
                                1, hepdata_submission)

        assert (ctx is not None)

        assert (ctx['version'] == 1)
        assert (ctx['recid'] == hepdata_submission.publication_recid)

        # remove the submission and test that all is remove

        unload_submission(hepdata_submission.publication_recid)

        assert (not record_exists(inspire_id=record['inspire_id']))

        data_submissions = DataSubmission.query.filter_by(
            publication_recid=hepdata_submission.publication_recid).count()

        assert (data_submissions == 0)

        sleep(2)

        admin_idx_results = admin_idx.search(
            term=hepdata_submission.publication_recid, fields=['recid'])
        assert (len(admin_idx_results) == 0)

        # Check file dir has been deleted
        assert (not os.path.exists(directory))
Example #8
0
def test_upload_valid_file(app):
    # Test uploading and processing a file for a record
    with app.app_context():
        base_dir = os.path.dirname(os.path.realpath(__file__))

        for i, status in enumerate(["todo", "sandbox"]):
            user = User.query.first()
            login_user(user)

            recid = f'12345{i}'
            get_or_create_hepsubmission(recid, 1, status=status)

            hepdata_submission = HEPSubmission.query.filter_by(
                publication_recid=recid).first()
            assert (hepdata_submission is not None)
            assert (hepdata_submission.data_abstract is None)
            assert (hepdata_submission.created <
                    hepdata_submission.last_updated)
            assert (hepdata_submission.version == 1)
            assert (hepdata_submission.overall_status == status)

            with open(
                    os.path.join(base_dir, 'test_data/TestHEPSubmission.zip'),
                    "rb") as stream:
                test_file = FileStorage(stream=stream,
                                        filename="TestHEPSubmission.zip")
                response = process_payload(recid,
                                           test_file,
                                           '/test_redirect_url',
                                           synchronous=True)

            assert (response.json == {'url': '/test_redirect_url'})

            # Check the submission has been updated
            hepdata_submission = HEPSubmission.query.filter_by(
                publication_recid=recid).first()
            assert (hepdata_submission.data_abstract.startswith(
                'CERN-LHC.  Measurements of the cross section  for ZZ production'
            ))
            assert (hepdata_submission.created <
                    hepdata_submission.last_updated)
            assert (hepdata_submission.version == 1)
            assert (hepdata_submission.overall_status == status)

            # Set the status to finished and try again, to check versioning
            if status == "todo":
                hepdata_submission.overall_status = 'finished'
                db.session.add(hepdata_submission)

            # Sleep before uploading new version to avoid dir name conflict
            sleep(1)

            # Refresh user
            user = User.query.first()
            login_user(user)

            # Upload a new version
            with open(
                    os.path.join(base_dir, 'test_data/TestHEPSubmission.zip'),
                    "rb") as stream:
                test_file = FileStorage(stream=stream,
                                        filename="TestHEPSubmission.zip")
                process_payload(recid,
                                test_file,
                                '/test_redirect_url',
                                synchronous=True)

            # Check the submission has been updated (overridden for a sandbox;
            # new version for normal submission)
            expected_versions = 2 if status == "todo" else 1
            hepdata_submissions = HEPSubmission.query.filter_by(
                publication_recid=recid).order_by(
                    HEPSubmission.last_updated).all()
            assert (len(hepdata_submissions) == expected_versions)
            assert (hepdata_submissions[0].version == 1)

            if status == "todo":
                assert (hepdata_submissions[0].overall_status == 'finished')

            assert (hepdata_submissions[-1].data_abstract.startswith(
                'CERN-LHC.  Measurements of the cross section  for ZZ production'
            ))
            assert (hepdata_submissions[-1].version == expected_versions)
            assert (hepdata_submissions[-1].overall_status == status)

            # Check that there are the expected number of subdirectories and
            # zip files under the record's main path
            # For status = 'todo' (standard submission) there will be 1 file
            # and 1 dir for each of 2 versions; for the sandbox submission
            # there will just be 1 file and 1 dir.
            directory = get_data_path_for_record(
                hepdata_submission.publication_recid)
            assert (os.path.exists(directory))
            filepaths = os.listdir(directory)
            assert (len(filepaths) == 2 * expected_versions)

            dir_count = 0
            file_count = 0
            for path in filepaths:
                if os.path.isdir(os.path.join(directory, path)):
                    dir_count += 1
                    assert (re.match(r"\d{10}", path) is not None)
                else:
                    file_count += 1
                    assert (re.match(r"HEPData-%s-v[12]-yaml.zip" % recid,
                                     path) is not None)

            assert (dir_count == expected_versions)
            assert (file_count == expected_versions)

            if status == "todo":
                # Delete the v2 submission and check db and v2 files have been removed
                unload_submission(hepdata_submission.publication_recid,
                                  version=2)

                hepdata_submissions = HEPSubmission.query.filter_by(
                    publication_recid=recid).order_by(
                        HEPSubmission.last_updated).all()
                assert (len(hepdata_submissions) == 1)
                assert (hepdata_submissions[0].version == 1)
                assert (hepdata_submissions[0].overall_status == 'finished')

                filepaths = os.listdir(directory)
                assert (len(filepaths) == 2)
                assert (f"HEPData-12345{i}-v1-yaml.zip" in filepaths)

            # Delete the submission and check everything has been removed
            unload_submission(hepdata_submission.publication_recid, version=1)

            hepdata_submissions = HEPSubmission.query.filter_by(
                publication_recid=recid).order_by(
                    HEPSubmission.last_updated).all()
            assert (len(hepdata_submissions) == 0)

            assert (not os.path.exists(directory))
Example #9
0
def do_unload(records_to_unload):
    for record_id in records_to_unload:
        unload_submission(record_id)
Example #10
0
def _import_record(inspire_id,
                   update_existing=False,
                   base_url='https://hepdata.net',
                   send_email=False):
    publication_information, status = get_inspire_record_information(
        inspire_id)
    if status != "success":
        log.error("Failed to retrieve publication information for " +
                  inspire_id)
        return False

    current_submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not current_submission:
        log.info(
            "The record with id {0} does not exist in the database, so we're loading it."
            .format(inspire_id))
        publication_information["inspire_id"] = inspire_id
        record_information = create_record(publication_information)
        recid = record_information['recid']
    else:
        log.info("The record with inspire id {0} already exists.".format(
            inspire_id))
        if update_existing:
            log.info("Updating instead")
            recid = current_submission.publication_recid
        else:
            log.info("Not updating as update_existing is False")
            return False

    try:
        download_path = _download_file(base_url, inspire_id)

        filename = os.path.basename(download_path)

        time_stamp = str(int(round(time.time())))
        file_save_directory = get_data_path_for_record(str(recid), time_stamp)
        if not os.path.exists(file_save_directory):
            os.makedirs(file_save_directory)

        file_path = os.path.join(file_save_directory, filename)
        log.info("Moving file to %s" % file_path)
        shutil.copy(download_path, file_path)

        # Create submission
        admin_user_id = 1
        hepsubmission = get_or_create_hepsubmission(recid, admin_user_id)
        db.session.add(hepsubmission)
        db.session.commit()

        # Then process the payload as for any other record
        errors = process_zip_archive(file_path, recid)
        if errors:
            log.info("Errors processing archive. Re-trying with old schema.")
            # Try again with old schema
            # Need to clean up first to avoid errors
            # First delete tables
            cleanup_submission(recid, 1, [])
            # Next remove remaining files
            file_save_directory = os.path.dirname(file_path)
            submission_path = os.path.join(file_save_directory,
                                           remove_file_extension(filename))
            shutil.rmtree(submission_path)

            errors = process_zip_archive(file_path,
                                         recid,
                                         old_submission_schema=True,
                                         old_data_schema=True)

            if errors:
                log.error("Could not process zip archive: ")
                for file, file_errors in errors.items():
                    log.error("    %s:" % file)
                    for error in file_errors:
                        log.error("        %s" % error['message'])

                raise ValueError("Could not validate record.")

        # Delete any previous upload folders
        cleanup_old_files(hepsubmission)

        log.info("Finalising record %s" % recid)

        result_json = do_finalise(recid,
                                  force_finalise=True,
                                  update=(current_submission is not None),
                                  convert=False,
                                  send_email=send_email)
        result = json.loads(result_json)

        if result and result['success']:
            log.info("Imported record %s with %s submissions" %
                     (recid, result['data_count']))
            return True
        else:
            raise ValueError("Failed to finalise record.")
    except Exception as e:
        # Unload record
        unload_submission(recid)
        log.error(e)
        return False