Python encode_string Examples, hepdata.modules.records.utils.common.encode_string Python Examples

Example #1

0

Show file

File: api.py Project: HEPData/hepdata3

def send_cookie_email(submission_participant,
                      record_information, message=None):
    try:
        message_body = render_template(
            'hepdata_theme/email/invite.html',
            name=submission_participant.full_name,
            role=submission_participant.role,
            title=encode_string(record_information['title'], 'utf-8'),
            site_url=current_app.config.get('SITE_URL', 'https://www.hepdata.net'),
            invite_token=submission_participant.invitation_cookie,
            message=message)
    except UnicodeDecodeError:
        message_body = render_template(
            'hepdata_theme/email/invite.html',
            name=submission_participant.full_name,
            role=submission_participant.role,
            site_url=current_app.config.get('SITE_URL', 'https://www.hepdata.net'),
            title=None,
            invite_token=submission_participant.invitation_cookie,
            message=message)

    create_send_email_task(submission_participant.email,
                           "[HEPData] Invitation to be a {0} of record {1} in HEPData".format(
                               submission_participant.role,
                               submission_participant.publication_recid), message_body)

Example #2

0

Show file

File: api.py Project: drjova/hepdata

def send_cookie_email(submission_participant,
                      record_information,
                      message=None):
    try:
        message_body = render_template(
            'hepdata_theme/email/invite.html',
            name=submission_participant.full_name,
            role=submission_participant.role,
            title=encode_string(record_information['title']),
            site_url=current_app.config.get('SITE_URL',
                                            'https://www.hepdata.net'),
            invite_token=submission_participant.invitation_cookie,
            message=message)
    except UnicodeDecodeError:
        message_body = render_template(
            'hepdata_theme/email/invite.html',
            name=submission_participant.full_name,
            role=submission_participant.role,
            site_url=current_app.config.get('SITE_URL',
                                            'https://www.hepdata.net'),
            title=None,
            invite_token=submission_participant.invitation_cookie,
            message=message)

    create_send_email_task(
        submission_participant.email,
        "[HEPData] Invitation to be a {0} of record {1} in HEPData".format(
            submission_participant.role,
            submission_participant.publication_recid), message_body)

Example #3

0

Show file

File: api.py Project: ruphy/hepdata

def get_pending_invitations_for_user(user):
    """
    Returns pending invites for upload or review of records.
    :param user: User object
    :return: array of pending invites
    """
    pending_invites = SubmissionParticipant.query.filter(
        SubmissionParticipant.email == user.email,
        or_(SubmissionParticipant.role == 'reviewer',
            SubmissionParticipant.role == 'uploader'),
        SubmissionParticipant.user_account == None).all()

    result = []

    for invite in pending_invites:
        publication_record = get_record_by_id(invite.publication_recid)
        hepsubmission = get_latest_hepsubmission(
            publication_recid=invite.publication_recid)

        coordinator = get_user_from_id(hepsubmission.coordinator)
        result.append({
            'title':
            encode_string(publication_record['title'], 'utf-8'),
            'invitation_cookie':
            invite.invitation_cookie,
            'role':
            invite.role,
            'coordinator':
            coordinator
        })

    return result

Example #4

0

Show file

def add_data_review_messsage(publication_recid, data_recid):
    """
    Adds a new review message for a data submission.

    :param publication_recid:
    :param data_recid:
    """

    trace = []
    message = encode_string(request.form.get('message', ''))
    version = request.form['version']
    userid = current_user.get_id()

    try:
        datareview_query = DataReview.query.filter_by(data_recid=data_recid,
                                                      version=version)

        # if the data review is not already created, create one.
        try:
            data_review_record = datareview_query.one()
            trace.append("adding data review record")
        except:
            data_review_record = create_data_review(data_recid,
                                                    publication_recid)
            trace.append("created a new data review record")

        data_review_message = Message(user=userid, message=message)
        data_review_record.messages.append(data_review_message)

        db.session.commit()

        current_user_obj = get_user_from_id(userid)

        update_action_for_submission_participant(publication_recid, userid,
                                                 'reviewer')
        send_new_review_message_email(data_review_record, data_review_message,
                                      current_user_obj)

        return json.dumps(
            {
                "publication_recid": data_review_record.publication_recid,
                "data_recid": data_review_record.data_recid,
                "status": data_review_record.status,
                "message": decode_string(data_review_message.message),
                "post_time": data_review_message.creation_date,
                'user': current_user_obj.email
            },
            default=default_time)
    except Exception as e:
        db.session.rollback()
        raise e

Example #5

0

Show file

def send_email(destination, subject, message, reply_to_address=None):
    try:
        connection = connect()
        mmp_msg = MIMEMultipart('alternative')
        mmp_msg['Subject'] = encode_string(subject)
        mmp_msg[
            'From'] = reply_to_address if reply_to_address else current_app.config[
                'MAIL_DEFAULT_SENDER']
        mmp_msg['To'] = destination

        part1 = MIMEText(encode_string(message), 'html')
        mmp_msg.attach(part1)

        recipients = destination.split(',')
        recipients.append(current_app.config['ADMIN_EMAIL'])

        connection.sendmail(current_app.config['MAIL_DEFAULT_SENDER'],
                            recipients, mmp_msg.as_string())
        connection.quit()
    except SMTPRecipientsRefused as smtp_error:
        send_error_mail(smtp_error)
    except Exception as e:
        print('Exception occurred.')
        raise e

Example #6

0

Show file

def process_general_submission_info(basepath, submission_info_document, recid):
    """
    Processes the top level information about a submission,
    extracting the information about the data abstract,
    additional resources for the submission (files, links,
    and html inserts) and historical modification information.

    :param basepath: the path the submission has been loaded to
    :param submission_info_document: the data document
    :param recid:
    :return:
    """

    hepsubmission = get_latest_hepsubmission(publication_recid=recid)

    if "comment" in submission_info_document:
        hepsubmission.data_abstract = encode_string(
            submission_info_document['comment'])

    if "dateupdated" in submission_info_document:
        try:
            hepsubmission.last_updated = parse(
                submission_info_document['dateupdated'], dayfirst=True)
        except ValueError:
            hepsubmission.last_updated = datetime.now()
    else:
        hepsubmission.last_updated = datetime.now()

    if "modifications" in submission_info_document:
        parse_modifications(hepsubmission, recid, submission_info_document)

    if 'additional_resources' in submission_info_document:

        for reference in hepsubmission.resources:
            db.session.delete(reference)

        resources = parse_additional_resources(basepath, recid,
                                               submission_info_document)
        for resource in resources:
            hepsubmission.resources.append(resource)

    db.session.add(hepsubmission)
    db.session.commit()

Example #7

0

Show file

File: api.py Project: HEPData/hepdata3

def get_pending_invitations_for_user(user):
    pending_invites = SubmissionParticipant.query.filter(
        SubmissionParticipant.email == user.email,
        or_(SubmissionParticipant.role == 'reviewer',
            SubmissionParticipant.role == 'uploader'),
        SubmissionParticipant.user_account == None
    ).all()

    result = []

    for invite in pending_invites:
        publication_record = get_record_by_id(invite.publication_recid)
        hepsubmission = get_latest_hepsubmission(publication_recid=invite.publication_recid)

        coordinator = get_user_from_id(hepsubmission.coordinator)
        result.append(
            {'title': encode_string(publication_record['title'], 'utf-8'),
             'invitation_cookie': invite.invitation_cookie,
             'role': invite.role, 'coordinator': coordinator})

    return result

Example #8

0

Show file

File: submission.py Project: HEPData/hepdata3

def process_general_submission_info(basepath, submission_info_document, recid):
    """
    Processes the top level information about a submission,
    extracting the information about the data abstract,
    additional resources for the submission (files, links,
    and html inserts) and historical modification information.
    :param submission_info_document: the data document
    :param recid:
    :return:
    """

    if 'comment' in submission_info_document \
        or 'modifications' in submission_info_document \
        or 'record_ids' in submission_info_document:

        hepsubmission = get_latest_hepsubmission(publication_recid=recid)
        hepsubmission.data_abstract = encode_string(
            submission_info_document['comment'])

        if "dateupdated" in submission_info_document:
            try:
                hepsubmission.last_updated = parse(submission_info_document['dateupdated'], dayfirst=True)
            except ValueError as ve:
                hepsubmission.last_updated = datetime.now()

        if "modifications" in submission_info_document:
            parse_modifications(hepsubmission, recid, submission_info_document)

        if 'additional_resources' in submission_info_document:

            for reference in hepsubmission.resources:
                db.session.delete(reference)

            resources = parse_additional_resources(basepath,
                                                   recid, hepsubmission.version, submission_info_document)
            for resource in resources:
                hepsubmission.resources.append(resource)

        db.session.add(hepsubmission)
        db.session.commit()

Example #9

0

Show file

File: twitter.py Project: HEPData/hepdata3

def tweet(title, collaborations, url):
    """
    :param title:
    :param collaborations:
    :param url:
    :return:
    """
    if USE_TWITTER:
        if not OAUTH_TOKEN or not OAUTH_SECRET or not CONSUMER_KEY or not CONSUMER_SECRET:
            # log this error
            print("Twitter credentials must be supplied!")
        else:
            twitter = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET))
            try:
                status = "Added{0} data on \"{1}\" to {2}".format(
                    get_collaboration_string(collaborations), truncate_string(encode_string(cleanup_latex(title)), 10),
                    url)

                twitter.statuses.update(status=status)
            except Exception as e:
                print(e.__str__())
                # It would be nice to get a stack trace here
                print("(P) Failed to post tweet for record {0}".format(url))

Example #10

0

Show file

File: twitter.py Project: ruphy/hepdata

def tweet(title, collaborations, url, version=1):
    """
    :param title:
    :param collaborations:
    :param url:
    :param version:
    :return:
    """
    if USE_TWITTER:

        OAUTH_TOKEN = current_app.config['OAUTH_TOKEN']
        OAUTH_SECRET = current_app.config['OAUTH_SECRET']
        CONSUMER_KEY = current_app.config['CONSUMER_KEY']
        CONSUMER_SECRET = current_app.config['CONSUMER_SECRET']

        if not OAUTH_TOKEN or not OAUTH_SECRET or not CONSUMER_KEY or not CONSUMER_SECRET:
            # log this error
            print("Twitter credentials must be supplied!")
        else:
            twitter = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET,
                                         CONSUMER_KEY, CONSUMER_SECRET))

            # Try to tweet with paper title truncated to 10 words.
            # If tweet exceeds 140 characters, keep trying with one less word each time.
            words = 10
            tweeted = False
            while words and not tweeted:

                try:

                    if version == 1:
                        status = "Added{0} data on \"{1}\" to {2}".format(
                            get_collaboration_string(collaborations),
                            truncate_string(
                                encode_string(cleanup_latex(title)), words),
                            url)
                    else:
                        status = "Revised{0} data on \"{1}\" at {2}?version={3}".format(
                            get_collaboration_string(collaborations),
                            truncate_string(
                                encode_string(cleanup_latex(title)), words),
                            url, version)

                    twitter.statuses.update(status=status)
                    tweeted = True
                    print("Tweeted: {}".format(status))

                except Exception as e:
                    # It would be nice to get a stack trace here
                    if e.e.code == 403:
                        error = json.loads(e.response_data.decode('utf8'))
                        if error["errors"][0][
                                "code"] == 186:  # Status is over 140 characters.
                            words = words - 1  # Try again with one less word.
                        else:
                            break
                    else:
                        break

            if not tweeted:
                print(e.__str__())
                print("(P) Failed to post tweet for record {0}".format(url))

Example #11

0

Show file

def process_submission_directory(basepath,
                                 submission_file_path,
                                 recid,
                                 update=False,
                                 *args,
                                 **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.
    :param basepath:
    :param submission_file_path:
    :param recid:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:
        submission_file = open(submission_file_path, 'r')

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        data_file_validator = DataFileValidator()

        if is_valid_submission_file:
            try:
                submission_processed = yaml.load_all(submission_file,
                                                     Loader=yaml.CSafeLoader)
            except:
                submission_processed = yaml.safe_load_all(submission_file)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id'
                              in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(
                    publication_recid=recid,
                    overall_status='todo',
                    inspire_id=hepsubmission.inspire_id,
                    coordinator=hepsubmission.coordinator,
                    version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission, update)

            for yaml_document in submission_processed:
                if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document:
                    # comments are only present in the general submission
                    # information document.
                    process_general_submission_info(basepath, yaml_document,
                                                    recid)
                else:
                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    if existing_datasubmission_query.count() == 0:
                        datasubmission = DataSubmission(
                            publication_recid=recid,
                            name=encode_string(yaml_document["name"]),
                            description=encode_string(
                                yaml_document["description"]),
                            version=hepsubmission.version)

                    else:
                        datasubmission = existing_datasubmission_query.one()
                        datasubmission.description = encode_string(
                            yaml_document["description"])

                    db.session.add(datasubmission)

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    if data_file_validator.validate(file_path=main_file_path):
                        process_data_file(recid, hepsubmission.version,
                                          basepath, yaml_document,
                                          datasubmission, main_file_path)
                    else:
                        errors = process_validation_errors_for_display(
                            data_file_validator.get_messages())

                        data_file_validator.clear_messages()

            cleanup_submission(recid, hepsubmission.version, added_file_names)

            db.session.commit()

            if len(errors) is 0:
                package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(recid, hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)
        else:
            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())

            submission_file_validator.clear_messages()
            data_file_validator.clear_messages()
    else:
        # return an error
        errors = {
            "submission.yaml": [{
                "level":
                "error",
                "message":
                "No submission.yaml file found in submission."
            }]
        }
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors

Example #12

0

Show file

def process_submission_directory(basepath,
                                 submission_file_path,
                                 recid,
                                 update=False,
                                 *args,
                                 **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.

    :param basepath:
    :param submission_file_path:
    :param recid:
    :param update:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        if is_valid_submission_file:

            submission_file = open(submission_file_path, 'r')
            submission_processed = yaml.load_all(submission_file,
                                                 Loader=Loader)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id'
                              in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(
                    publication_recid=recid,
                    overall_status='todo',
                    inspire_id=hepsubmission.inspire_id,
                    coordinator=hepsubmission.coordinator,
                    version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission, update)

            no_general_submission_info = True

            data_file_validator = DataFileValidator()

            # Delete all data records associated with this submission.
            # Fixes problems with ordering where the table names are changed between uploads.
            # See https://github.com/HEPData/hepdata/issues/112
            # Side effect that reviews will be deleted between uploads.
            cleanup_submission(recid, hepsubmission.version, added_file_names)

            for yaml_document_index, yaml_document in enumerate(
                    submission_processed):
                if not yaml_document:
                    continue

                # Check for presence of local files given as additional_resources.
                if 'additional_resources' in yaml_document:
                    for resource in yaml_document['additional_resources']:
                        location = os.path.join(basepath, resource['location'])
                        if not resource['location'].startswith(
                            ('http', '/resource/')):
                            if not os.path.isfile(location):
                                errors[resource['location']] = [{
                                    "level":
                                    "error",
                                    "message":
                                    "Missing 'additional_resources' file from uploaded archive."
                                }]
                            elif '/' in resource['location']:
                                errors[resource['location']] = [{
                                    "level":
                                    "error",
                                    "message":
                                    "Location of 'additional_resources' file should not contain '/'."
                                }]

                if not yaml_document_index and 'name' not in yaml_document:

                    no_general_submission_info = False
                    process_general_submission_info(basepath, yaml_document,
                                                    recid)

                elif not all(k in yaml_document
                             for k in ('name', 'description', 'keywords',
                                       'data_file')):

                    errors["submission.yaml"] = [{
                        "level":
                        "error",
                        "message":
                        "YAML document with index {} ".format(
                            yaml_document_index) +
                        "missing one or more required keys (name, description, keywords, data_file)."
                    }]

                else:

                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    try:
                        if existing_datasubmission_query.count() == 0:
                            datasubmission = DataSubmission(
                                publication_recid=recid,
                                name=encode_string(yaml_document["name"]),
                                description=encode_string(
                                    yaml_document["description"]),
                                version=hepsubmission.version)
                        else:
                            datasubmission = existing_datasubmission_query.one(
                            )
                            datasubmission.description = encode_string(
                                yaml_document["description"])
                        db.session.add(datasubmission)
                    except SQLAlchemyError as sqlex:
                        errors[yaml_document["data_file"]] = [{
                            "level":
                            "error",
                            "message":
                            str(sqlex)
                        }]
                        db.session.rollback()
                        continue

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    data, ex = _eos_fix_read_data(main_file_path)

                    if not data or data is None or ex is not None:

                        errors[yaml_document["data_file"]] = \
                            [{"level": "error", "message": "There was a problem parsing the file.\n" + str(ex)}]

                    elif '/' in yaml_document["data_file"]:

                        errors[yaml_document["data_file"]] = \
                            [{"level": "error", "message": "Name of data_file should not contain '/'.\n"}]

                    else:

                        if data_file_validator.validate(
                                file_path=main_file_path, data=data):
                            try:
                                process_data_file(recid, hepsubmission.version,
                                                  basepath, yaml_document,
                                                  datasubmission,
                                                  main_file_path)
                            except SQLAlchemyError as sqlex:
                                errors[yaml_document["data_file"]] = [{
                                    "level":
                                    "error",
                                    "message":
                                    "There was a problem processing the file.\n"
                                    + str(sqlex)
                                }]
                                db.session.rollback()
                        else:
                            errors = process_validation_errors_for_display(
                                data_file_validator.get_messages())
                            data_file_validator.clear_messages()

                        if yaml_document["data_file"] not in errors:
                            # Check that the length of the 'values' list is consistent
                            # for each of the independent_variables and dependent_variables.
                            indep_count = [
                                len(indep['values'])
                                for indep in data['independent_variables']
                            ]
                            dep_count = [
                                len(dep['values'])
                                for dep in data['dependent_variables']
                            ]
                            if len(set(indep_count + dep_count)
                                   ) > 1:  # if more than one unique count
                                errors.setdefault(
                                    yaml_document["data_file"], []
                                ).append({
                                    "level":
                                    "error",
                                    "message":
                                    "Inconsistent length of 'values' list:\n" +
                                    "independent_variables{}, dependent_variables{}"
                                    .format(str(indep_count), str(dep_count))
                                })

            submission_file.close()

            if no_general_submission_info:
                hepsubmission.last_updated = datetime.now()
                db.session.add(hepsubmission)
                db.session.commit()

            # The line below is commented out since it does not preserve the order of tables.
            # Delete all tables above instead: side effect of deleting reviews between uploads.
            #cleanup_submission(recid, hepsubmission.version, added_file_names)

            db.session.commit()

            if len(errors) is 0:
                errors = package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(
                    publication_recid=recid, version=hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)

            else:  # delete all tables if errors
                cleanup_submission(recid, hepsubmission.version, {})

        else:

            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())
            submission_file_validator.clear_messages()

    else:
        # return an error
        errors = {
            "submission.yaml": [{
                "level":
                "error",
                "message":
                "No submission.yaml file found in submission."
            }]
        }
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors

Example #13

0

Show file

File: submission.py Project: HEPData/hepdata3

def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.
    :param basepath:
    :param submission_file_path:
    :param recid:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:
        submission_file = open(submission_file_path, 'r')

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        data_file_validator = DataFileValidator()

        if is_valid_submission_file:
            try:
                submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader)
            except:
                submission_processed = yaml.safe_load_all(submission_file)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(publication_recid=recid,
                                                   overall_status='todo',
                                                   inspire_id=hepsubmission.inspire_id,
                                                   coordinator=hepsubmission.coordinator,
                                                   version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission)

            for yaml_document in submission_processed:
                if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document:
                    # comments are only present in the general submission
                    # information document.
                    process_general_submission_info(basepath, yaml_document, recid)
                else:
                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    if existing_datasubmission_query.count() == 0:
                        datasubmission = DataSubmission(
                            publication_recid=recid,
                            name=encode_string(yaml_document["name"]),
                            description=encode_string(
                                yaml_document["description"]),
                            version=hepsubmission.version)

                    else:
                        datasubmission = existing_datasubmission_query.one()
                        datasubmission.description = encode_string(
                            yaml_document["description"])

                    db.session.add(datasubmission)

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    if data_file_validator.validate(file_path=main_file_path):
                        process_data_file(recid, hepsubmission.version, basepath, yaml_document,
                                          datasubmission, main_file_path)
                    else:
                        errors = process_validation_errors_for_display(
                            data_file_validator.get_messages())

                        data_file_validator.clear_messages()

            cleanup_submission(recid, hepsubmission.version,
                               added_file_names)

            db.session.commit()

            if len(errors) is 0:
                package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(recid, hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)
        else:
            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())

            submission_file_validator.clear_messages()
            data_file_validator.clear_messages()
    else:
        # return an error
        errors = {"submission.yaml": [
            {"level": "error",
             "message": "No submission.yaml file found in submission."}
        ]}
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors