Esempio n. 1
0
def get_data_path(record_id=None, inspire_id=None):
    """Gets the file path where data files for the given record are stored."""
    if record_id:
        # Check record exists
        hepsubmission = get_latest_hepsubmission(publication_recid=record_id)
        if hepsubmission is None:
            click.echo("No record with id %s" % record_id)
            return
    elif inspire_id:
        hepsubmission = get_latest_hepsubmission(inspire_id=inspire_id)
        if hepsubmission is None:
            click.echo("No record with inspire id %s" % inspire_id)
            return
        else:
            record_id = hepsubmission.publication_recid
            click.echo("Inspire ID %s maps to record id %s" %
                       (inspire_id, record_id))

    else:
        click.echo("Please provide either record-id or inspire-id.")
        return

    click.echo("Files for record %s are at:\t\t %s" %
               (record_id, data_files.get_data_path_for_record(record_id)))
    click.echo("Converted files for record %s are at:\t %s" %
               (record_id, data_files.get_converted_directory_path(record_id)))
Esempio n. 2
0
def send_cookie_email(submission_participant,
                      record_information,
                      message=None,
                      version=1):

    hepsubmission = get_latest_hepsubmission(
        publication_recid=record_information['recid'])
    coordinator = User.query.get(hepsubmission.coordinator)
    collaboration = _get_collaboration(hepsubmission.coordinator)

    message_body = render_template(
        'hepdata_theme/email/invite.html',
        name=submission_participant.full_name,
        role=submission_participant.role,
        title=record_information['title'],
        site_url=current_app.config.get('SITE_URL', 'https://www.hepdata.net'),
        user_account=submission_participant.user_account,
        invite_token=submission_participant.invitation_cookie,
        status=submission_participant.status,
        recid=submission_participant.publication_recid,
        version=version,
        email=submission_participant.email,
        coordinator_email=coordinator.email,
        collaboration=collaboration,
        message=message)

    create_send_email_task(
        submission_participant.email,
        "[HEPData] Invitation to be {0} {1} of record {2} in HEPData".format(
            "an" if submission_participant.role == "uploader" else "a",
            submission_participant.role.capitalize(),
            submission_participant.publication_recid), message_body)
Esempio n. 3
0
def get_pending_invitations_for_user(user):
    """
    Returns pending invites for upload or review of records.

    :param user: User object
    :return: array of pending invites
    """
    pending_invites = SubmissionParticipant.query.filter(
        func.lower(SubmissionParticipant.email) == func.lower(user.email),
        or_(SubmissionParticipant.role == 'reviewer',
            SubmissionParticipant.role == 'uploader'),
        SubmissionParticipant.status == 'primary',
        SubmissionParticipant.user_account == None).all()

    result = []

    for invite in pending_invites:
        publication_record = get_record_by_id(invite.publication_recid)
        hepsubmission = get_latest_hepsubmission(
            publication_recid=invite.publication_recid)

        coordinator = get_user_from_id(hepsubmission.coordinator)
        result.append({
            'title': decode_string(publication_record['title']),
            'invitation_cookie': invite.invitation_cookie,
            'role': invite.role,
            'coordinator': coordinator
        })

    return result
Esempio n. 4
0
def send_question_email(question):
    reply_to = current_user.email

    submission = get_latest_hepsubmission(
        publication_recid=question.publication_recid)
    submission_participants = get_submission_participants_for_record(
        question.publication_recid)

    if submission:
        destinations = [current_app.config['ADMIN_EMAIL']]
        for submission_participant in submission_participants:
            destinations.append(submission_participant.email)

        coordinator = User.query.get(submission.coordinator)
        if coordinator.id > 1:
            destinations.append(coordinator.email)

        if len(destinations) > 0:
            message_body = render_template('hepdata_theme/email/question.html',
                                           inspire_id=submission.inspire_id,
                                           user_email=reply_to,
                                           site_url=current_app.config.get(
                                               'SITE_URL',
                                               'https://www.hepdata.net'),
                                           message=question.question)

            create_send_email_task(
                destination=','.join(set(destinations)),
                subject="[HEPData] Question for record ins{0}".format(
                    submission.inspire_id),
                message=message_body,
                reply_to_address=reply_to)
Esempio n. 5
0
def add_participant(recid):
    """
    Adds a participant to a record
    :param recid:
    :return:
    """
    try:
        submission_record = get_latest_hepsubmission(publication_recid=recid)
        full_name = request.form['name']
        email = request.form['email']
        participant_type = request.form['type']

        new_record = SubmissionParticipant(publication_recid=recid,
                                           full_name=full_name,
                                           email=email, role=participant_type)
        submission_record.participants.append(new_record)
        db.session.commit()
        return json.dumps(
            {"success": True, "recid": recid,
             "message": "{0} {1} added.".format(full_name, participant_type)})

    except Exception as e:
        return json.dumps(
            {"success": False, "recid": recid,
             "message": 'Unable to add participant.'})
Esempio n. 6
0
def download_submission_with_inspire_id(*args, **kwargs):
    """
       Gets the submission file and either serves it back directly from YAML, or converts it
       for other formats.
       :param inspire_id: inspire id
       :param version: version of submission to export. If -1, returns the latest.
       :param file_format: yaml, csv, ROOT, or YODA
       :return:
    """

    inspire_id = kwargs.pop('inspire_id')

    if 'ins' in inspire_id:
        inspire_id = inspire_id.replace('ins', '')

    if 'version' in kwargs:
        submission = HEPSubmission.query.filter_by(inspire_id=inspire_id, version=kwargs.pop('version')).first()
    else:
        submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not submission:
        return display_error(
            title="No submission found",
            description="A submission with INSPIRE id {0} does not exist".format(inspire_id)
        )

    return download_submission(submission, kwargs.pop('file_format'))
Esempio n. 7
0
def download_data_table_by_recid(*args, **kwargs):
    """
    Record ID download
    Downloads the latest data file given the url /download/submission/1231/Table 1/yaml or
    by a particular version given /download/submission/1231/Table 1/1/yaml

    :param args:
    :param kwargs: inspire_id, table_name, version (optional), and file_format
    :return:
    """
    recid = kwargs.pop('recid')
    table_name = kwargs.pop('table_name')

    # Allow space in table_name to be omitted from URL.
    if ' ' not in table_name:
        table_name = table_name.replace('Table', 'Table ')

    if 'version' not in kwargs:
        version = get_latest_hepsubmission(publication_recid=recid).version
    else:
        version = kwargs.pop('version')

    datasubmission = DataSubmission.query.filter_by(publication_recid=recid,
                                                    version=version,
                                                    name=table_name).one()

    return download_datatable(datasubmission,
                              kwargs.pop('file_format'),
                              submission_id='{0}'.format(recid),
                              table_name=table_name)
Esempio n. 8
0
def send_question_email(question):
    reply_to = current_user.email

    submission = get_latest_hepsubmission(publication_recid=question.publication_recid)
    submission_participants = get_submission_participants_for_record(question.publication_recid)

    if submission:
        destinations = [current_app.config['ADMIN_EMAIL']]
        for submission_participant in submission_participants:
            destinations.append(submission_participant.email)

        if submission.coordinator > 1:
            destinations.append(submission.coordinator.email)

        if len(destinations) > 0:
            message_body = render_template(
                'hepdata_theme/email/question.html',
                inspire_id=submission.inspire_id,
                user_email=reply_to,
                site_url=current_app.config.get('SITE_URL', 'https://www.hepdata.net'),
                message=question.question)

            create_send_email_task(destination=','.join(destinations),
                                   subject="[HEPData] Question for record ins{0}".format(submission.inspire_id),
                                   message=message_body, reply_to_address=reply_to)
Esempio n. 9
0
def download_data_table_by_recid(*args, **kwargs):
    """
    Record ID download
    Downloads the latest data file given the url /download/submission/1231/Table 1/yaml or
    by a particular version given /download/submission/1231/Table 1/yaml
    :param args:
    :param kwargs: inspire_id, table_name, version (optional), and file_format
    :return:
    """
    recid = kwargs.pop('recid')
    table_name = kwargs.pop('table_name')

    if 'version' not in kwargs:
        version = get_latest_hepsubmission(publication_recid=recid).version
    else:
        version = kwargs.pop('version')

    datasubmission = db.session.query(DataResource) \
        .join(DataSubmission) \
        .filter(DataSubmission.publication_recid == recid, version=version,
                name=table_name) \
        .one()

    return download_datatable(datasubmission, kwargs.pop('file_format'),
                              submission_id='{0}'.format(recid), table_name=table_name)
Esempio n. 10
0
def download_data_table_by_inspire_id(*args, **kwargs):
    """
    Downloads the latest data file given the url /download/submission/ins1283842/Table 1/yaml or
    by a particular version given /download/submission/ins1283842/Table 1/yaml
    :param args:
    :param kwargs: inspire_od, table_name, version (optional), and file_format
    :return:
    """
    inspire_id = kwargs.pop('inspire_id')
    table_name = kwargs.pop('table_name')

    if 'ins' in inspire_id:
        inspire_id = inspire_id.replace('ins', '')

    if 'version' not in kwargs:
        version = get_latest_hepsubmission(inspire_id=inspire_id).version
    else:
        version = kwargs.pop('version')

    datasubmission = db.session.query(DataResource) \
        .join(DataSubmission) \
        .filter(DataSubmission.publication_inspire_id == inspire_id, DataSubmission.version == version,
                DataSubmission.name == table_name) \
        .one()

    return download_datatable(datasubmission, kwargs.pop('file_format'),
                              submission_id='ins{0}'.format(inspire_id), table_name=table_name)
Esempio n. 11
0
def get_resources(recid, version):
    """
    Gets a list of resources for a publication, relevant to all data records.

    :param recid:
    :return: json
    """
    result = {'submission_items': []}
    common_resources = {'name': 'Common Resources', 'type': 'submission', 'version': version, 'id': recid,
                        'resources': []}
    submission = get_latest_hepsubmission(publication_recid=recid, version=version)

    if submission:
        for reference in submission.resources:
            common_resources['resources'].append(process_resource(reference))

    result['submission_items'].append(common_resources)

    datasubmissions = DataSubmission.query.filter_by(publication_recid=recid, version=version).\
        order_by(DataSubmission.id.asc()).all()

    for datasubmission in datasubmissions:
        submission_item = {'name': datasubmission.name, 'type': 'data', 'id': datasubmission.id, 'resources': [],
                           'version': datasubmission.version}
        for reference in datasubmission.resources:
            submission_item['resources'].append(process_resource(reference))

        result['submission_items'].append(submission_item)
    return json.dumps(result)
Esempio n. 12
0
def create_new_version(recid, user, notify_uploader=True, uploader_message=None):
    hepsubmission = get_latest_hepsubmission(publication_recid=recid)

    if hepsubmission.overall_status == 'finished':
        # Reopen the submission to allow for revisions,
        # by creating a new HEPSubmission object.
        _rev_hepsubmission = HEPSubmission(publication_recid=recid,
                                           overall_status='todo',
                                           inspire_id=hepsubmission.inspire_id,
                                           coordinator=hepsubmission.coordinator,
                                           version=hepsubmission.version + 1)
        db.session.add(_rev_hepsubmission)
        db.session.commit()

        if notify_uploader:
            uploaders = SubmissionParticipant.query.filter_by(
                role='uploader', publication_recid=recid, status='primary'
                )
            record_information = get_record_by_id(recid)
            for uploader in uploaders:
                send_cookie_email(uploader,
                                  record_information,
                                  message=uploader_message,
                                  version=_rev_hepsubmission.version)

        return jsonify({'success': True, 'version': _rev_hepsubmission.version})
    else:
        return jsonify({"message": f"Rec id {recid} is not finished so cannot create a new version"}), 400
Esempio n. 13
0
def add_participant(recid):
    """
    Adds a participant to a record.

    :param recid:
    :return:
    """
    try:
        submission_record = get_latest_hepsubmission(publication_recid=recid)
        full_name = request.form['name']
        email = request.form['email']
        participant_type = request.form['type']

        new_record = SubmissionParticipant(publication_recid=recid,
                                           full_name=full_name,
                                           email=email,
                                           role=participant_type)
        submission_record.participants.append(new_record)
        db.session.commit()
        return json.dumps({
            "success":
            True,
            "recid":
            recid,
            "message":
            "{0} {1} added.".format(full_name, participant_type)
        })

    except Exception as e:
        return json.dumps({
            "success": False,
            "recid": recid,
            "message": 'Unable to add participant.'
        })
Esempio n. 14
0
def get_resources(recid, version):
    """
    Gets a list of resources for a publication, relevant to all data records
    :param recid:
    :return: json
    """
    result = OrderedDict()
    result["Common Resources"] = {"type": "submission", "version": version, "id": recid, "resources": []}
    submission = get_latest_hepsubmission(publication_recid=recid, version=version)

    if submission:
        for reference in submission.resources:
            result["Common Resources"]["resources"].append(process_reference(reference))

    datasubmissions = DataSubmission.query.filter_by(publication_recid=recid, version=version).all()

    for datasubmission in datasubmissions:
        result[datasubmission.name] = {
            "type": "data",
            "id": datasubmission.id,
            "resources": [],
            "version": datasubmission.version,
        }
        for reference in datasubmission.resources:
            result[datasubmission.name]["resources"].append(process_reference(reference))

    return json.dumps(result)
Esempio n. 15
0
def promote_or_demote_participant(recid, action, demote_or_promote,
                                  participant_id):
    """
    Can promote or demote a participant to/from primary reviewer/uploader.

    :param recid: record id that the user will be promoted or demoted for
    :param action: upload or review
    :param demote_or_promote: demote or promote
    :param participant_id: id of user from the SubmissionParticipant table.
    :return:
    """
    try:
        participant = SubmissionParticipant.query.filter_by(
            id=participant_id).one()

        status = 'reserve'
        if demote_or_promote == 'promote':
            status = 'primary'

        participant.status = status
        db.session.add(participant)
        db.session.commit()

        record = get_record_by_id(recid)

        # now send the email telling the user of their new status!
        if status == 'primary':
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            send_cookie_email(participant,
                              record,
                              version=hepsubmission.version)

        return json.dumps({"success": True, "recid": recid})
    except Exception as e:
        return json.dumps({"success": False, "recid": recid, "error": str(e)})
Esempio n. 16
0
def download_submission_with_inspire_id(*args, **kwargs):
    """
       Gets the submission file and either serves it back directly from YAML, or converts it
       for other formats.

       :param inspire_id: inspire id
       :param version: version of submission to export. If absent, returns the latest.
       :param file_format: yaml, csv, root, or yoda
       :return:
    """

    inspire_id = kwargs.pop('inspire_id')

    if 'ins' in inspire_id:
        inspire_id = inspire_id.replace('ins', '')

    if 'version' in kwargs:
        submission = HEPSubmission.query.filter_by(
            inspire_id=inspire_id, version=kwargs.pop('version')).first()
    else:
        submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not submission:
        return display_error(
            title="No submission found",
            description="A submission with Inspire ID {0} does not exist".
            format(inspire_id))

    return download_submission(submission, kwargs.pop('file_format'))
Esempio n. 17
0
def unload_submission(record_id, version=1):

    submission = get_latest_hepsubmission(publication_recid=record_id)

    if not submission:
        print('Record {0} not found'.format(record_id))
        return

    if version == submission.version:
        print('Unloading record {0} version {1}...'.format(record_id, version))
        remove_submission(record_id, version)
    else:
        print('Not unloading record {0} version {1} (latest version {2})...'.format(record_id, version, submission.version))
        return

    if version == 1:

        data_records = get_records_matching_field("related_publication", record_id)
        for record in data_records["hits"]["hits"]:
            print("\t Removed data table {0} from index".format(record["_id"]))
            try:
                delete_item_from_index(doc_type=CFG_DATA_TYPE, id=record["_id"], parent=record_id)
            except Exception as e:
                logging.error("Unable to remove {0} from index. {1}".format(record["_id"], e))

        try:
            delete_item_from_index(doc_type=CFG_PUB_TYPE, id=record_id)
            print("Removed publication {0} from index".format(record_id))
        except NotFoundError as nfe:
            print(nfe)

    print('Finished unloading record {0} version {1}.'.format(record_id, version))
Esempio n. 18
0
def get_all_review_messages(publication_recid):
    """
    Gets the review messages for a publication id.

    :param publication_recid:
    :return:
    """
    messages = OrderedDict()

    latest_submission = get_latest_hepsubmission(publication_recid=publication_recid)

    datareview_query = DataReview.query.filter_by(
        publication_recid=publication_recid, version=latest_submission.version).order_by(
        DataReview.id.asc())

    if datareview_query.count() > 0:
        reviews = datareview_query.all()

        for data_review in reviews:

            data_submission_query = DataSubmission.query.filter_by(
                id=data_review.data_recid)
            data_submission_record = data_submission_query.one()

            if data_review.data_recid not in messages:
                if data_submission_query.count() > 0:
                    messages[data_submission_record.name] = []

            query_messages_for_data_review(data_review, messages[
                data_submission_record.name])

    return json.dumps(messages, default=default_time)
Esempio n. 19
0
def delete_submission(recid):
    """
    Submissions can only be removed if they are not finalised,
    meaning they should never be in the index.
    Only delete the latest version of a submission.
    Delete indexed information only if version = 1.

    :param recid:
    :return:
    """
    if has_role(current_user, 'admin') or has_role(current_user, 'coordinator') \
        or check_is_sandbox_record(recid):

        submission = get_latest_hepsubmission(publication_recid=recid)
        unload_submission(recid, submission.version)

        if submission.version == 1:
            admin_idx = AdminIndexer()
            admin_idx.find_and_delete('recid', recid)

        return json.dumps({"success": True,
                           "recid": recid,
                           "errors": [
                               "Record successfully removed!"]})
    else:
        return json.dumps(
            {"success": False, "recid": recid,
             "errors": [
                 "You do not have permission to delete this submission. "
                 "Only coordinators can do that."]})
Esempio n. 20
0
def render_record(recid, record, version, output_format, light_mode=False):

    if user_allowed_to_perform_action(recid):
        version_count = HEPSubmission.query.filter_by(
            publication_recid=recid).count()
    else:
        version_count = HEPSubmission.query.filter_by(
            publication_recid=recid, overall_status='finished').count()

    if version == -1:
        version = version_count

    hepdata_submission = get_latest_hepsubmission(publication_recid=recid, version=version)

    if hepdata_submission is not None:
        ctx = format_submission(recid, record, version, version_count, hepdata_submission)
        increment(recid)
        if output_format == "json":
            ctx = process_ctx(ctx, light_mode)
            return jsonify(ctx)
        else:
            return render_template('hepdata_records/publication_record.html',
                                   ctx=ctx)

    else:  # this happens when we access an id of a data record
        # in which case, we find the related publication, and
        # make the front end focus on the relevant data table.
        try:
            publication_recid = int(record['related_publication'])
            publication_record = get_record_contents(publication_recid)

            hepdata_submission = get_latest_hepsubmission(recid=publication_recid)

            ctx = format_submission(publication_recid, publication_record,
                                    hepdata_submission.version, 1, hepdata_submission,
                                    data_table=record['title'])
            ctx['related_publication_id'] = publication_recid
            ctx['table_name'] = record['title']

            if output_format == "json":
                ctx = process_ctx(ctx, light_mode)

                return jsonify(ctx)
            else:
                return render_template('hepdata_records/data_record.html', ctx=ctx)
        except Exception:
            return render_template('hepdata_theme/404.html')
Esempio n. 21
0
def create_record_for_dashboard(record_id,
                                submissions,
                                current_user,
                                coordinator=None,
                                user_role=None,
                                status="todo"):
    if user_role is None:
        user_role = ["coordinator"]

    publication_record = get_record_by_id(int(record_id))

    if publication_record is not None:
        if record_id not in submissions:

            hepdata_submission_record = get_latest_hepsubmission(
                publication_recid=record_id)

            submissions[record_id] = {}
            submissions[record_id]["metadata"] = {
                "recid": record_id,
                "role": user_role,
                "start_date": publication_record.created
            }

            submissions[record_id]["metadata"][
                "versions"] = hepdata_submission_record.version
            submissions[record_id]["status"] = status
            submissions[record_id]["stats"] = {
                "passed": 0,
                "attention": 0,
                "todo": 0
            }

            if coordinator:
                submissions[record_id]["metadata"]["coordinator"] = {
                    'id': coordinator.id,
                    'name': coordinator.email,
                    'email': coordinator.email
                }
                submissions[record_id]["metadata"]["show_coord_view"] = int(
                    current_user.get_id()) == coordinator.id
            else:
                submissions[record_id]["metadata"]["coordinator"] = {
                    'name': 'No coordinator'
                }

            if "title" in publication_record:
                submissions[record_id]["metadata"]["title"] = \
                    publication_record['title']

            if "inspire_id" not in publication_record or publication_record[
                    "inspire_id"] is None:
                submissions[record_id]["metadata"][
                    "requires_inspire_id"] = True
        else:
            # if it is, it's because the user has two roles for that
            # submission. So we should show them!
            if user_role not in submissions[record_id]["metadata"]["role"]:
                submissions[record_id]["metadata"]["role"].append(user_role)
Esempio n. 22
0
def update_analyses():
    endpoints = current_app.config["ANALYSES_ENDPOINTS"]
    for analysis_endpoint in endpoints:

        if "endpoint_url" in endpoints[analysis_endpoint]:

            log.info("Updating analyses from {0}...".format(analysis_endpoint))

            response = requests.get(
                endpoints[analysis_endpoint]["endpoint_url"])

            if response:

                analyses = response.json()

                for record in analyses:
                    submission = get_latest_hepsubmission(
                        inspire_id=record, overall_status='finished')

                    if submission:
                        num_new_resources = 0

                        for analysis in analyses[record]:
                            _resource_url = endpoints[analysis_endpoint][
                                "url_template"].format(analysis)
                            if not is_resource_added_to_submission(
                                    submission.publication_recid,
                                    submission.version, _resource_url):
                                print(
                                    'Adding {} analysis to ins{} with URL {}'.
                                    format(analysis_endpoint, record,
                                           _resource_url))
                                new_resource = DataResource(
                                    file_location=_resource_url,
                                    file_type=analysis_endpoint)

                                submission.resources.append(new_resource)
                                num_new_resources += 1

                        if num_new_resources:

                            try:
                                db.session.add(submission)
                                db.session.commit()
                                index_record_ids(
                                    [submission.publication_recid])
                            except Exception as e:
                                db.session.rollback()
                                log.error(e)

                    else:
                        log.debug(
                            "An analysis is available in {0} but with no equivalent in HEPData (ins{1})."
                            .format(analysis_endpoint, record))
        else:
            log.debug(
                "No endpoint url configured for {0}".format(analysis_endpoint))
Esempio n. 23
0
def test_update_record_info(app):
    """Test update of publication information from INSPIRE."""
    assert update_record_info(
        None) == 'Inspire ID is None'  # case where Inspire ID is None
    for inspire_id in (
            '1311487',
            '19999999'):  # check both a valid and invalid Inspire ID
        assert update_record_info(
            inspire_id
        ) == 'No HEPData submission'  # before creation of HEPSubmission object
        submission = process_submission_payload(inspire_id=inspire_id,
                                                submitter_id=1,
                                                reviewer={
                                                    'name': 'Reviewer',
                                                    'email':
                                                    '*****@*****.**'
                                                },
                                                uploader={
                                                    'name': 'Uploader',
                                                    'email':
                                                    '*****@*****.**'
                                                },
                                                send_upload_email=False)

        # Process the files to create DataSubmission tables in the DB.
        base_dir = os.path.dirname(os.path.realpath(__file__))
        directory = os.path.join(base_dir, 'test_data/test_submission')
        tmp_path = os.path.join(tempfile.mkdtemp(dir=CFG_TMPDIR),
                                'test_submission')
        shutil.copytree(directory, tmp_path)
        process_submission_directory(tmp_path,
                                     os.path.join(tmp_path, 'submission.yaml'),
                                     submission.publication_recid)
        do_finalise(submission.publication_recid,
                    force_finalise=True,
                    convert=False)

        if inspire_id == '19999999':
            assert update_record_info(inspire_id) == 'Invalid Inspire ID'
        else:

            # First change the publication information to that of a different record.
            different_inspire_record_information, status = get_inspire_record_information(
                '1650066')
            assert status == 'success'
            hep_submission = get_latest_hepsubmission(inspire_id=inspire_id)
            assert hep_submission is not None
            update_record(hep_submission.publication_recid,
                          different_inspire_record_information)

            # Then can check that the update works and that a further update is not required.
            assert update_record_info(inspire_id, send_email=True) == 'Success'
            assert update_record_info(
                inspire_id
            ) == 'No update needed'  # check case where information already current

        unload_submission(submission.publication_recid)
Esempio n. 24
0
def send_email(inspireids):
    """Send finalised email announcing that records have been added or revised (in case it wasn't done automatically)."""
    processed_inspireids = parse_inspireids_from_string(inspireids)
    for inspireid in processed_inspireids:
        _cleaned_id = inspireid.replace("ins", "")
        submission = get_latest_hepsubmission(inspire_id=_cleaned_id, overall_status='finished')
        if submission:
            send_finalised_email(submission)
        else:
            print("No records found for Inspire ID {}".format(inspireid))
Esempio n. 25
0
def process_payload(recid, file, redirect_url, synchronous=False):
    """Process an uploaded file

    :param recid: int
        The id of the record to update
    :param file: file
        The file to process
    :param redirect_url: string
        Redirect URL to record, for use if the upload fails or in synchronous mode
    :param synchronous: bool
        Whether to process asynchronously via celery (default) or immediately (only recommended for tests)
    :return: JSONResponse either containing 'url' (for success cases) or
             'message' (for error cases, which will give a 400 error).
    """

    if file and (allowed_file(file.filename)):
        file_path = save_zip_file(file, recid)
        hepsubmission = get_latest_hepsubmission(publication_recid=recid)

        if hepsubmission.overall_status == 'finished':
            # If it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions,
            # by creating a new HEPSubmission object.
            _rev_hepsubmission = HEPSubmission(
                publication_recid=recid,
                overall_status='todo',
                inspire_id=hepsubmission.inspire_id,
                coordinator=hepsubmission.coordinator,
                version=hepsubmission.version + 1)
            db.session.add(_rev_hepsubmission)
            hepsubmission = _rev_hepsubmission

        previous_status = hepsubmission.overall_status
        hepsubmission.overall_status = 'sandbox_processing' if previous_status == 'sandbox' else 'processing'
        db.session.add(hepsubmission)
        db.session.commit()

        if synchronous:
            process_saved_file(file_path, recid, current_user.get_id(),
                               redirect_url, previous_status)
        else:
            process_saved_file.delay(file_path, recid, current_user.get_id(),
                                     redirect_url, previous_status)
            flash(
                'File saved. You will receive an email when the file has been processed.',
                'info')

        return jsonify({'url': redirect_url.format(recid)})
    else:
        return jsonify({
            "message":
            "You must upload a .zip, .tar, .tar.gz or .tgz file" +
            " (or a .oldhepdata or single .yaml or .yaml.gz file)."
        }), 400
Esempio n. 26
0
def download_submission_with_inspire_id(*args, **kwargs):
    """
    Gets the submission file and either serves it back directly from YAML, or converts it
    for other formats.  Routes:\n
    ``/submission/<inspire_id>/<file_format>``\n
    ``/submission/<inspire_id>/<int:version>/<file_format>``\n
    ``/submission/<inspire_id>/<int:version>/<file_format>/<rivet>``

    :param inspire_id: inspire id
    :param version: version of submission to export. If absent, returns the latest.
    :param file_format: yaml, csv, root, or yoda
    :param rivet: Rivet analysis name to override default written in YODA export
    :return: download_submission
    """

    inspire_id = kwargs.pop('inspire_id')

    if 'ins' in inspire_id:
        inspire_id = inspire_id.replace('ins', '')

    submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not submission:
        return display_error(
            title="No submission found",
            description="A submission with Inspire ID {0} does not exist".
            format(inspire_id))

    recid = submission.publication_recid
    version_count, version_count_all = get_version_count(recid)

    if 'version' in kwargs:
        version = kwargs.pop('version')
    else:
        # If version not given explicitly, take to be latest allowed version (or 1 if there are no allowed versions).
        version = version_count if version_count else 1

    if version_count < version_count_all and version == version_count_all:
        # Check for a user trying to access a version of a publication record where they don't have permissions.
        abort(403)
    elif version < version_count_all:
        submission = HEPSubmission.query.filter_by(inspire_id=inspire_id,
                                                   version=version).first()

    if not submission:
        return display_error(
            title="No submission found",
            description=
            "A submission with Inspire ID {0} and version {1} does not exist".
            format(inspire_id, version))

    return download_submission(submission,
                               kwargs.pop('file_format'),
                               rivet_analysis_name=kwargs.pop('rivet', ''))
Esempio n. 27
0
def has_upload_permissions(recid, user, is_sandbox=False):
    if has_role(user, 'admin'):
        return True

    if is_sandbox:
        hepsubmission_record = get_latest_hepsubmission(publication_recid=recid, overall_status='sandbox')
        return hepsubmission_record is not None and hepsubmission_record.coordinator == user.id

    participant = SubmissionParticipant.query.filter_by(user_account=user.id,
        role='uploader', publication_recid=recid, status='primary').first()
    if participant:
        return True
Esempio n. 28
0
def test_create_new_version(app, load_default_data, identifiers, mocker):
    hepsubmission = get_latest_hepsubmission(publication_recid=1)
    assert hepsubmission.version == 1

    # Add an uploader
    uploader = SubmissionParticipant(publication_recid=1,
                                     email='*****@*****.**',
                                     role='uploader',
                                     status='primary')
    db.session.add(uploader)
    db.session.commit()

    user = User.query.first()

    # Mock `send_cookie_email` method
    send_cookie_mock = mocker.patch(
        'hepdata.modules.records.api.send_cookie_email')

    # Create new version of valid finished record
    result = create_new_version(1, user, uploader_message="Hello!")
    assert result.status_code == 200
    assert result.json == {'success': True, 'version': 2}

    # get_latest_hepsubmission should now return version 2
    hepsubmission = get_latest_hepsubmission(publication_recid=1)
    assert hepsubmission.version == 2
    assert hepsubmission.overall_status == 'todo'

    # Should have attempted to send uploader email
    send_cookie_mock.assert_called_with(uploader,
                                        get_record_by_id(1),
                                        message="Hello!",
                                        version=2)

    # Try creating a new version - should not work as status of most recent is 'todo'
    result, status_code = create_new_version(1, user)
    assert status_code == 400
    assert result.json == {
        'message': 'Rec id 1 is not finished so cannot create a new version'
    }
Esempio n. 29
0
def send_tweet(inspireids):
    """Send tweet announcing that records have been added or revised (in case it wasn't done automatically)."""
    processed_inspireids = parse_inspireids_from_string(inspireids)
    for inspireid in processed_inspireids:
        _cleaned_id = inspireid.replace("ins", "")
        submission = get_latest_hepsubmission(inspire_id=_cleaned_id, overall_status='finished')
        if submission:
            record = get_record_by_id(submission.publication_recid)
            site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
            url = site_url + '/record/ins{0}'.format(record['inspire_id'])
            tweet(record['title'], record['collaborations'], url, record['version'])
        else:
            print("No records found for Inspire ID {}".format(inspireid))
Esempio n. 30
0
def test_tables(app, live_server, env_browser):
    """E2E test to tables in a record."""
    browser = env_browser

    # Import record with non-default table names
    import_default_data(app, [{'hepdata_id': 'ins1206352'}])

    try:
        browser.get(flask.url_for('hepdata_theme.index', _external=True))
        assert (flask.url_for('hepdata_theme.index', _external=True)
                in browser.current_url)

        latest_item = browser.find_element_by_css_selector(
            '.latest-record .title')
        actions = ActionChains(browser)
        actions.move_to_element(latest_item).perform()
        latest_item.click()

        # Check current table name
        assert (browser.find_element_by_id('table_name').text ==
                'Figure 8 panel (a)')

        # Check switching tables works as expected
        new_table = browser.find_elements_by_css_selector(
            '#table-list li h4')[2]
        assert (new_table.text == "Figure 8 panel (c)")
        new_table.click()
        _check_table_links(browser, "Figure 8 panel (c)")

        # Get link to table from table page
        table_link = browser.find_element_by_css_selector('#data_link_container button') \
            .get_attribute('data-clipboard-text')
        assert (table_link.endswith('table=Figure%208%20panel%20(c)'))
        _check_table_links(browser, "Figure 8 panel (c)", url=table_link)

        # Check a link to a table name with spaces removed
        short_table_link = table_link.replace('%20', '')
        _check_table_links(browser, "Figure 8 panel (c)", url=short_table_link)

        # Check a link to an invalid table
        invalid_table_link = table_link.replace('Figure%208%20panel%20(c)',
                                                'NotARealTable')
        _check_table_links(browser,
                           "Figure 8 panel (a)",
                           url=invalid_table_link)

    finally:
        # Delete record and reindex so added record doesn't affect other tests
        submission = get_latest_hepsubmission(inspire_id='1206352')
        unload_submission(submission.publication_recid)
        reindex_all(recreate=True)
Esempio n. 31
0
def convert_and_store(inspire_id, file_format, force):
    """
    Converts a submission to a given file format, and stores
    on the file system to be retrieved later by users
    :param inspire_id:
    :param file_format:
    :param force:
    :return:
    """
    submission = get_latest_hepsubmission(inspire_id=inspire_id)
    if submission:
        print("Creating {0} conversion for ins{1}".format(file_format, inspire_id))
        download_submission(submission, file_format, offline=True, force=force)
    else:
        print("Unable to find a matching submission for {0}".format(inspire_id))
Esempio n. 32
0
def create_record_for_dashboard(record_id, submissions, primary_uploader=None,
                                primary_reviewer=None, coordinator=None,
                                user_role=None,
                                status="todo"):
    if user_role is None:
        user_role = ["coordinator"]

    publication_record = get_record_by_id(int(record_id))

    if publication_record is not None:
        if record_id not in submissions:

            hepdata_submission_record = get_latest_hepsubmission(publication_recid=record_id)

            submissions[record_id] = {}
            submissions[record_id]["metadata"] = {"recid": record_id,
                                                  "role": user_role,
                                                  "start_date": publication_record.created}

            submissions[record_id]["metadata"][
                "versions"] = hepdata_submission_record.version
            submissions[record_id]["status"] = status
            submissions[record_id]["stats"] = {"passed": 0, "attention": 0,
                                               "todo": 0}

            if coordinator:
                submissions[record_id]["metadata"]["coordinator"] = {
                    'id': coordinator.id, 'name': coordinator.email,
                    'email': coordinator.email}
                submissions[record_id]["metadata"][
                    "show_coord_view"] = int(current_user.get_id()) == coordinator.id
            else:
                submissions[record_id]["metadata"]["coordinator"] = {
                    'name': 'No coordinator'}

            if "title" in publication_record:
                submissions[record_id]["metadata"]["title"] = \
                    publication_record['title']

            if "inspire_id" not in publication_record or publication_record["inspire_id"] is None:
                submissions[record_id]["metadata"][
                    "requires_inspire_id"] = True
        else:
            # if it is, it's because the user has two roles for that
            # submission. So we should show them!
            if user_role not in submissions[record_id]["metadata"]["role"]:
                submissions[record_id]["metadata"]["role"].append(user_role)
Esempio n. 33
0
def convert_and_store(inspire_id, file_format, force):
    """
    Converts a submission to a given file format, and stores
    on the file system to be retrieved later by users
    :param inspire_id:
    :param file_format:
    :param force:
    :return:
    """
    submission = get_latest_hepsubmission(inspire_id=inspire_id)
    if submission:
        print("Creating {0} conversion for ins{1}".format(
            file_format, inspire_id))
        download_submission(submission, file_format, offline=True, force=force)
    else:
        print(
            "Unable to find a matching submission for {0}".format(inspire_id))
Esempio n. 34
0
def add_analyses(doc):
    """
    TODO: Generalise for badges other than Rivet
    :param doc:
    :return:
    """
    latest_submission = get_latest_hepsubmission(
        publication_recid=doc['recid'])

    if latest_submission:
        doc["analyses"] = []
        for reference in latest_submission.resources:
            if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
                doc["analyses"].append({
                    'type': reference.file_type,
                    'analysis': reference.file_location
                })
Esempio n. 35
0
File: cli.py Progetto: ruphy/hepdata
def send_tweet(inspireids):
    """
    Send tweet announcing that records have been added or revised (in case it wasn't done automatically).

    :param inspireids: list of record IDs to tweet
    """
    processed_inspireids = parse_inspireids_from_string(inspireids)
    for inspireid in processed_inspireids:
        _cleaned_id = inspireid.replace("ins", "")
        submission = get_latest_hepsubmission(inspire_id=_cleaned_id)
        if submission:
            record = get_record_by_id(submission.publication_recid)
            url = "http://www.hepdata.net/record/ins{0}".format(
                record['inspire_id'])
            tweet(record['title'], record['collaborations'], url,
                  record['version'])
        else:
            print("No records found for Inspire ID {}".format(inspireid))
Esempio n. 36
0
def process_general_submission_info(basepath, submission_info_document, recid):
    """
    Processes the top level information about a submission,
    extracting the information about the data abstract,
    additional resources for the submission (files, links,
    and html inserts) and historical modification information.

    :param basepath: the path the submission has been loaded to
    :param submission_info_document: the data document
    :param recid:
    :return:
    """

    hepsubmission = get_latest_hepsubmission(publication_recid=recid)

    if "comment" in submission_info_document:
        hepsubmission.data_abstract = encode_string(
            submission_info_document['comment'])

    if "dateupdated" in submission_info_document:
        try:
            hepsubmission.last_updated = parse(
                submission_info_document['dateupdated'], dayfirst=True)
        except ValueError:
            hepsubmission.last_updated = datetime.now()
    else:
        hepsubmission.last_updated = datetime.now()

    if "modifications" in submission_info_document:
        parse_modifications(hepsubmission, recid, submission_info_document)

    if 'additional_resources' in submission_info_document:

        for reference in hepsubmission.resources:
            db.session.delete(reference)

        resources = parse_additional_resources(basepath, recid,
                                               submission_info_document)
        for resource in resources:
            hepsubmission.resources.append(resource)

    db.session.add(hepsubmission)
    db.session.commit()
Esempio n. 37
0
def add_analyses(doc):
    """
    TODO: Generalise for other badges other than rivit
    :param doc:
    :return:
    """

    # do lookup from http://rivet.hepforge.org/list_of_analyses.json
    # for HEPforge. But only one lookup.

    # look up once per day, and cache the result in REDIS.

    latest_submission = get_latest_hepsubmission(publication_recid=doc['recid'])

    if latest_submission:
        doc["analyses"] = []
        for reference in latest_submission.resources:
            if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
                doc["analyses"].append({'type': reference.file_type, 'analysis': reference.file_location})
Esempio n. 38
0
def delete_all_files(rec_id, check_old_data_paths=True):
    """
    Deletes all data files across ALL versions of a record.
    """
    record_data_paths = [get_data_path_for_record(rec_id)]

    if check_old_data_paths:
        record_data_paths.append(get_old_data_path_for_record(rec_id))
        hepsubmission = get_latest_hepsubmission(publication_recid=rec_id)
        if hepsubmission and hepsubmission.inspire_id is not None:
            record_data_paths.append(
                get_old_data_path_for_record('ins%s' %
                                             hepsubmission.inspire_id))

    for record_data_path in record_data_paths:
        log.debug("Scanning directory: %s" % record_data_path)

        if os.path.isdir(record_data_path):
            log.debug("Removing %s" % record_data_path)
            shutil.rmtree(record_data_path)
Esempio n. 39
0
def get_coordinator_view(recid):
    # there should only ever be one rev
    hepsubmission_record = get_latest_hepsubmission(publication_recid=recid)

    participants = {"reviewer": {"reserve": [], "primary": []}, "uploader": {"reserve": [], "primary": []}}

    for participant in hepsubmission_record.participants:
        participants[participant.role][participant.status].append(
            {"full_name": participant.full_name, "email": participant.email, "id": participant.id}
        )

    return json.dumps(
        {
            "recid": recid,
            "primary-reviewers": participants["reviewer"]["primary"],
            "reserve-reviewers": participants["reviewer"]["reserve"],
            "primary-uploaders": participants["uploader"]["primary"],
            "reserve-uploaders": participants["uploader"]["reserve"],
        }
    )
Esempio n. 40
0
def send_coordinator_notification_email(recid, version, user, message=None):
    """
    :param recid:
    :param user: user object
    :param message: message to send
    :return:
    """

    hepsubmission = get_latest_hepsubmission(publication_recid=recid)
    coordinator = get_user_from_id(hepsubmission.coordinator)

    if not coordinator:
        raise NoParticipantsException()

    site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')

    record = get_record_by_id(recid)

    name = coordinator.email
    coordinator_profile = UserProfile.get_by_userid(hepsubmission.coordinator)
    if coordinator_profile:
        name = coordinator_profile.full_name

    collaboration = _get_collaboration(hepsubmission.coordinator)

    message_body = render_template('hepdata_theme/email/passed_review.html',
                                   name=name,
                                   actor=user.email,
                                   collaboration=collaboration,
                                   article=recid,
                                   version=version,
                                   message=message,
                                   title=record['title'],
                                   site_url=site_url,
                                   link=site_url + "/record/{0}".format(recid),
                                   dashboard_link=site_url + "/dashboard")

    create_send_email_task(
        coordinator.email,
        '[HEPData] Submission {0} is ready to be finalised'.format(recid),
        message_body)
Esempio n. 41
0
def get_pending_invitations_for_user(user):
    pending_invites = SubmissionParticipant.query.filter(
        SubmissionParticipant.email == user.email,
        or_(SubmissionParticipant.role == 'reviewer',
            SubmissionParticipant.role == 'uploader'),
        SubmissionParticipant.user_account == None
    ).all()

    result = []

    for invite in pending_invites:
        publication_record = get_record_by_id(invite.publication_recid)
        hepsubmission = get_latest_hepsubmission(publication_recid=invite.publication_recid)

        coordinator = get_user_from_id(hepsubmission.coordinator)
        result.append(
            {'title': encode_string(publication_record['title'], 'utf-8'),
             'invitation_cookie': invite.invitation_cookie,
             'role': invite.role, 'coordinator': coordinator})

    return result
Esempio n. 42
0
def process_general_submission_info(basepath, submission_info_document, recid):
    """
    Processes the top level information about a submission,
    extracting the information about the data abstract,
    additional resources for the submission (files, links,
    and html inserts) and historical modification information.
    :param submission_info_document: the data document
    :param recid:
    :return:
    """

    if 'comment' in submission_info_document \
        or 'modifications' in submission_info_document \
        or 'record_ids' in submission_info_document:

        hepsubmission = get_latest_hepsubmission(publication_recid=recid)
        hepsubmission.data_abstract = encode_string(
            submission_info_document['comment'])

        if "dateupdated" in submission_info_document:
            try:
                hepsubmission.last_updated = parse(submission_info_document['dateupdated'], dayfirst=True)
            except ValueError as ve:
                hepsubmission.last_updated = datetime.now()

        if "modifications" in submission_info_document:
            parse_modifications(hepsubmission, recid, submission_info_document)

        if 'additional_resources' in submission_info_document:

            for reference in hepsubmission.resources:
                db.session.delete(reference)

            resources = parse_additional_resources(basepath,
                                                   recid, hepsubmission.version, submission_info_document)
            for resource in resources:
                hepsubmission.resources.append(resource)

        db.session.add(hepsubmission)
        db.session.commit()
Esempio n. 43
0
def update_analyses():

    endpoints = current_app.config['ANALYSES_ENDPOINTS']
    for analysis_endpoint in endpoints:

        if 'endpoint_url' in endpoints[analysis_endpoint]:

            log.info('Updating analyses from {0}...'.format(analysis_endpoint))

            response = requests.get(endpoints[analysis_endpoint]['endpoint_url'])

            if response:

                analyses = response.json()

                for record in analyses:
                    submission = get_latest_hepsubmission(inspire_id=record)

                    if submission:
                        for analysis in analyses[record]:
                            _resource_url = endpoints[analysis_endpoint]['url_template'].format(analysis)
                            if not is_resource_added_to_submission(submission.publication_recid, submission.version,
                                                                   _resource_url):
                                new_resource = DataResource(
                                    file_location=_resource_url,
                                    file_type=analysis_endpoint)

                                submission.resources.append(new_resource)

                        try:
                            db.session.add(submission)
                            db.session.commit()
                        except Exception as e:
                            db.session.rollback()
                            log.error(e)
                    else:
                        log.debug('An analysis is available in {0} but with no equivalent in HEPData (ins{1}).'.format(analysis_endpoint, record))
        else:
            log.debug('No endpoint url configured for {0}'.format(analysis_endpoint))
Esempio n. 44
0
def test_load_file(app, migrator):
    with app.app_context():
        success = migrator.load_file('ins1487726')
        assert (success)

        hepsubmission = get_latest_hepsubmission(inspire_id='1487726')

        assert (hepsubmission is not None)
        assert (hepsubmission.inspire_id == '1487726')

        index_records = get_records_matching_field('inspire_id',
                                                   hepsubmission.inspire_id,
                                                   doc_type='publication')
        assert (len(index_records['hits']['hits']) == 1)

        publication_record = get_record_contents(
            hepsubmission.publication_recid)
        assert (publication_record is not None)

        data_submissions = DataSubmission.query.filter_by(
            publication_recid=hepsubmission.publication_recid).count()
        assert (data_submissions == 5)
Esempio n. 45
0
def download_submission_with_recid(*args, **kwargs):
    """
        Gets the submission file and either serves it back directly from YAML, or converts it
        for other formats.
        :param recid: submissions recid
        :param version: version of submission to export. If -1, returns the latest.
        :param file_format: yaml, csv, ROOT, or YODA
        :return:
    """
    recid = kwargs.pop('recid')
    if 'version' in kwargs:
        submission = HEPSubmission.query.filter_by(publication_recid=recid, version=kwargs.pop('version')) \
            .first()
    else:
        submission = get_latest_hepsubmission(publication_recid=recid)

    if not submission:
        return display_error(
            title="No submission found",
            description="A submission with record id {0} does not exist".format(kwargs.pop('recid'))
        )

    return download_submission(submission, kwargs.pop('file_format'))
Esempio n. 46
0
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.
    :param basepath:
    :param submission_file_path:
    :param recid:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:
        submission_file = open(submission_file_path, 'r')

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        data_file_validator = DataFileValidator()

        if is_valid_submission_file:
            try:
                submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader)
            except:
                submission_processed = yaml.safe_load_all(submission_file)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(publication_recid=recid,
                                                   overall_status='todo',
                                                   inspire_id=hepsubmission.inspire_id,
                                                   coordinator=hepsubmission.coordinator,
                                                   version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission)

            for yaml_document in submission_processed:
                if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document:
                    # comments are only present in the general submission
                    # information document.
                    process_general_submission_info(basepath, yaml_document, recid)
                else:
                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    if existing_datasubmission_query.count() == 0:
                        datasubmission = DataSubmission(
                            publication_recid=recid,
                            name=encode_string(yaml_document["name"]),
                            description=encode_string(
                                yaml_document["description"]),
                            version=hepsubmission.version)

                    else:
                        datasubmission = existing_datasubmission_query.one()
                        datasubmission.description = encode_string(
                            yaml_document["description"])

                    db.session.add(datasubmission)

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    if data_file_validator.validate(file_path=main_file_path):
                        process_data_file(recid, hepsubmission.version, basepath, yaml_document,
                                          datasubmission, main_file_path)
                    else:
                        errors = process_validation_errors_for_display(
                            data_file_validator.get_messages())

                        data_file_validator.clear_messages()

            cleanup_submission(recid, hepsubmission.version,
                               added_file_names)

            db.session.commit()

            if len(errors) is 0:
                package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(recid, hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)
        else:
            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())

            submission_file_validator.clear_messages()
            data_file_validator.clear_messages()
    else:
        # return an error
        errors = {"submission.yaml": [
            {"level": "error",
             "message": "No submission.yaml file found in submission."}
        ]}
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors