Esempio n. 1
0
    def get_file(self, path: str, root: bool = True) -> BinaryIO:
        """ Returns the current version of a file from the repo. """

        secured_path, secured_filename = secure_full_path(path)
        if not secured_filename:
            raise RequestError('Cannot access directories, just files.')
        try:
            if root:
                return open(os.path.join(self._entry_dir, secured_filename),
                            "rb")
            else:
                return open(
                    os.path.join(self._entry_dir, 'data_files', secured_path,
                                 secured_filename), 'rb')
        except IOError:
            raise RequestError('No file with that name saved for this entry.')
Esempio n. 2
0
def get_schema(version: str,
               schema_format: str = "json") -> Union[dict, TextIO]:
    """ Return the schema from disk. """

    # When running locally
    schema_dir = os.path.join(root_dir, '..', 'schema', 'schema_data')
    if not os.path.exists(schema_dir):
        schema_dir = os.path.join(root_dir, '..', 'schema_data')
        if not os.path.exists(schema_dir):
            raise IOError("No schema directory found: %s" % schema_dir)

    try:
        if schema_format == "json":
            with open(os.path.join(schema_dir, version + '.json.zlib'),
                      'rb') as schema_file:
                schema = json.loads(
                    zlib.decompress(schema_file.read()).decode())
        elif schema_format == "xml":
            return open(os.path.join(schema_dir, version + '.xml'), 'r')
        else:
            raise ServerError('Attempted to load invalid schema type.')
    except IOError:
        raise RequestError("Invalid schema version.")

    return schema
Esempio n. 3
0
def secure_filename(filename: str) -> str:
    """ Wraps werkzeug secure_filename but raises an error if the filename comes out empty. """

    filename = werkzeug.utils.secure_filename(filename)
    if not filename:
        raise RequestError(
            'Invalid upload file name. Please rename the file and try again.')
    return filename
Esempio n. 4
0
    def raise_write_errors(self):
        """ Raises an error if the entry may not be edited. This could happen if it is already deposited, or the email
        has not been validated."""

        if not self._initialize:
            if self.metadata['entry_deposited']:
                raise RequestError(
                    'Entry already deposited, no changes allowed.')
        if self._read_only:
            raise ServerError('Cannot write to a deposition opened read-only!')
Esempio n. 5
0
def deposit_entry(uuid) -> Response:
    """ Complete the deposition. """

    if 'deposition_contents' not in request.form or not request.form[
            'deposition_contents']:
        raise RequestError('No deposition submitted.')
    final_entry: pynmrstar.Entry = pynmrstar.Entry.from_string(
        request.form['deposition_contents'])

    with depositions.DepositionRepo(uuid) as repo:
        bmrb_num = repo.deposit(final_entry)

        # Send out the e-mails
        contact_emails: List[str] = final_entry.get_loops_by_category(
            "_Contact_Person")[0].get_tag(['Email_address'])
        contact_full = [
            "%s %s <%s>" % tuple(x)
            for x in final_entry.get_loops_by_category("_Contact_Person")
            [0].get_tag(['Given_name', 'Family_name', 'Email_address'])
        ]
        message = Message("Your entry has been deposited!",
                          recipients=contact_emails,
                          reply_to=configuration['smtp']['reply_to_address'])
        message.html = 'Thank you for your deposition! Your assigned BMRB ID is %s. We have attached a copy of the ' \
                       'deposition contents for reference. You may also use this file to start a new deposition. ' \
                       'You will hear from our annotators in the next few days. Please note that any data files that ' \
                       'you uploaded will be manually integrated into the final NMR-STAR file by the BMRB annotators ' \
                       '- their contents are not included in the NMR-STAR file attached to this e-mail.<br><br>' \
                       'Deposited data files: %s' % (bmrb_num, repo.get_data_file_list())
        message.attach("%s.str" % uuid, "text/plain", str(final_entry))
        mail.send(message)

        # Send a message to the annotators
        if not configuration['debug']:
            if isinstance(configuration['smtp']['annotator_address'], list):
                send_to = configuration['smtp']['annotator_address']
            else:
                send_to = [configuration['smtp']['annotator_address']]
            message = Message("BMRBdep: BMRB entry %s has been deposited." %
                              bmrb_num,
                              recipients=send_to)
            message.body = '''The following new entry has been deposited via BMRBdep:

restart id:            %s
bmrb accession number: %s

title: %s

contact persons: %s
''' % (uuid, bmrb_num, final_entry['entry_information_1']['Title'][0],
            contact_full)
        mail.send(message)

    return jsonify({'commit': repo.last_commit})
Esempio n. 6
0
    def __init__(self,
                 uuid,
                 initialize: bool = False,
                 read_only: bool = False):
        self._repo: Repo
        self._uuid = uuid
        self._initialize: bool = initialize
        self._read_only: bool = read_only
        self._modified_files: bool = False
        self._live_metadata: dict = {}
        self._original_metadata: dict = {}
        uuids = str(uuid)
        self._lock_path: str = os.path.join(configuration['repo_path'],
                                            uuids[0], uuids[1], uuids, '.git',
                                            'api.lock')
        self._entry_dir: str = os.path.join(configuration['repo_path'],
                                            uuids[0], uuids[1], uuids)

        # Make sure the entry ID is valid, or throw an exception
        if not os.path.exists(self._entry_dir):
            if not self._initialize:
                raise RequestError('No deposition with that ID exists!',
                                   status_code=404)
            else:
                # Create the entry directory (and parent folders, where needed)
                first_parent = os.path.join(configuration['repo_path'],
                                            uuids[0])
                if not os.path.exists(first_parent):
                    os.mkdir(first_parent)
                second_parent = os.path.join(configuration['repo_path'],
                                             uuids[0], uuids[1])
                if not os.path.exists(second_parent):
                    os.mkdir(second_parent)
                os.mkdir(self._entry_dir)
                os.mkdir(os.path.join(self._entry_dir, '.git'))
                os.mkdir(os.path.join(self._entry_dir, 'data_files'))

                self._repo = Repo.init(self._entry_dir)
                with self._repo.config_writer() as config:
                    config.set_value("user", "name", "BMRBDep")
                    config.set_value("user", "email", "*****@*****.**")

        # Create the lock object
        self._lock_object: FileLock = FileLock(self._lock_path, timeout=360)

        if not self._initialize and not self._read_only:
            self._repo = Repo(self._entry_dir)
Esempio n. 7
0
def validate_user(token: str):
    """ Perform validation of user-email and then redirect to the entry loader URL. """

    serializer = URLSafeSerializer(application.config['SECRET_KEY'])
    try:
        deposition_data = serializer.loads(token)
        deposition_id = deposition_data['deposition_id']
    except (BadData, KeyError, TypeError):
        raise RequestError(
            'Invalid e-mail validation token. Please request a new e-mail validation message.'
        )

    with depositions.DepositionRepo(deposition_id) as repo:
        if not repo.metadata['email_validated']:
            repo.metadata['email_validated'] = True
            repo.commit("E-mail validated.")

    return redirect('/entry/load/%s' % deposition_id, code=302)
Esempio n. 8
0
def store_file(uuid) -> Response:
    """ Stores a data file based on uuid. """

    # Store a data file
    with depositions.DepositionRepo(uuid) as repo:

        temp_dir = configuration.get('temporary_directory', None)
        with tempfile.TemporaryDirectory(dir=temp_dir) as upload_dir:

            def custom_stream_factory(total_content_length,
                                      filename,
                                      content_type,
                                      content_length=None):
                return tempfile.NamedTemporaryFile('wb+',
                                                   prefix='flaskapp',
                                                   dir=upload_dir)

            stream, form, files = werkzeug.formparser.parse_form_data(
                request.environ, stream_factory=custom_stream_factory)
            for file_ in files.values():
                if file_.name == 'file':
                    filename = repo.write_file(file_.filename,
                                               source_path=file_.stream.name)
                    file_.close()

                    # Update the entry data
                    if repo.commit("User uploaded file: %s" % file_.filename):
                        return jsonify({
                            'filename': filename,
                            'changed': True,
                            'commit': repo.last_commit
                        })
                    else:
                        return jsonify({
                            'filename': filename,
                            'changed': False,
                            'commit': repo.last_commit
                        })

            raise RequestError(
                'No file uploaded, or file uploaded with the wrong parameter name!'
            )
Esempio n. 9
0
    def delete_data_file(self, path: str) -> bool:
        """ Delete a data file by name."""

        self.raise_write_errors()

        secured_path, secured_filename = secure_full_path(path)
        data_file_path = os.path.join(self._entry_dir, 'data_files',
                                      secured_path, secured_filename)

        try:
            if os.path.isfile(data_file_path):
                os.unlink(data_file_path)
            elif os.path.isdir(data_file_path):
                os.rmdir(data_file_path)
        except FileNotFoundError:
            return False
        except OSError:
            raise RequestError(
                'You must first remove any files in a directory before removing the directory itself.'
            )
        self._modified_files = True
        return True
Esempio n. 10
0
    def deposit(self, final_entry: pynmrstar.Entry) -> int:
        """ Deposits an entry into ETS. """

        self.raise_write_errors()
        if not self.metadata['email_validated']:
            raise RequestError(
                'You must validate your e-mail before deposition.')
        contact_emails: List[str] = final_entry.get_loops_by_category(
            "_Contact_Person")[0].get_tag(['Email_address'])
        if self.metadata['author_email'] not in contact_emails:
            raise RequestError(
                'At least one contact person must have the email of the original deposition creator.'
            )
        existing_entry_id = self.get_entry().entry_id

        if existing_entry_id != final_entry.entry_id:
            raise RequestError(
                'Invalid deposited entry. The ID must match that of this deposition.'
            )

        logging.info('Depositing deposition %s' % final_entry.entry_id)

        # Determine which schema version the entry is using
        schema: pynmrstar.Schema = pynmrstar.Schema(
            get_schema(self.metadata['schema_version'], schema_format='xml'))

        # Add tags stripped by the deposition interface
        final_entry.add_missing_tags(schema=schema)

        # We'll use this to assign Experiment_name tags later
        experiment_names: dict = {}
        try:
            experiment_names = dict(
                final_entry.get_loops_by_category('_Experiment')[0].get_tag(
                    ['id', 'name']))
        except IndexError:
            pass

        # Assign the PubMed ID
        for citation in final_entry.get_saveframes_by_category('citations'):
            if citation['PubMed_ID'] and citation['PubMed_ID'] != ".":
                update_citation_with_pubmed(citation, schema=schema)

        # Generate any necessary entities from chemcomps
        upgrade_chemcomps_and_create_entities_where_needed(final_entry,
                                                           schema=schema)

        for saveframe in final_entry:
            # Remove all unicode from the entry
            for tag in saveframe.tag_iterator():
                if isinstance(tag[1], str):
                    tag[1] = unidecode.unidecode(tag[1])
                    # In case only non-convertible unicode characters were there
                    if tag[1] == '':
                        tag[1] = None
            for loop in saveframe.loops:
                for row in loop.data:
                    for pos in range(0, len(row)):
                        if isinstance(row[pos], str):
                            row[pos] = unidecode.unidecode(row[pos])
                            # In case only non-convertible unicode characters were there
                            if row[pos] == '':
                                row[pos] = None

                # Set the "Experiment_name" tag from the "Experiment_ID" tag
                if 'Experiment_ID' in loop.tags:
                    name_tag_index = loop.tag_index('Experiment_name')
                    if name_tag_index is None:
                        loop.add_tag('Experiment_name', update_data=True)
                        name_tag_index = loop.tag_index('Experiment_name')
                    id_tag_index = loop.tag_index('Experiment_ID')
                    for row in loop.data:
                        if row[id_tag_index] in experiment_names:
                            row[name_tag_index] = experiment_names[
                                row[id_tag_index]]

            # Calculate the tag _Assembly.Number_of_components
            if saveframe.category == 'assembly':
                saveframe.add_tag('_Assembly.Number_of_components',
                                  len(saveframe['_Entity_assembly'].data),
                                  update=True)

        # Tweak the middle initials
        for loop_cat in [
                final_entry.get_loops_by_category(x) for x in
            ['_Contact_person', '_Entry_author', '_Citation_author']
        ]:
            for loop in loop_cat:
                middle_initial_index = loop.tag_index('Middle_initials')
                first_initial_index = loop.tag_index('First_initial')
                for row in loop.data:
                    if middle_initial_index and row[middle_initial_index]:
                        row[middle_initial_index] = ".".join(
                            row[middle_initial_index].replace(".", "")) + '.'
                    if first_initial_index and row[middle_initial_index]:
                        row[middle_initial_index] = ".".join(
                            row[middle_initial_index].replace(".", "")) + '.'

        # Delete the chemcomps if there is no ligand
        try:
            organic_count = int(
                final_entry.get_tag('Assembly.Organic_ligands')[0])
        except (ValueError, IndexError, TypeError):
            organic_count = 1
        try:
            metal_count = int(final_entry.get_tag('Assembly.Metal_ions')[0])
        except (ValueError, IndexError, TypeError):
            metal_count = 1
        if metal_count + organic_count == 0:
            for saveframe in final_entry.get_saveframes_by_category(
                    'chem_comp'):
                del final_entry[saveframe]

        # Insert the loops for residue sequences
        for entity in final_entry.get_saveframes_by_category('entity'):
            polymer_code: str = entity['Polymer_seq_one_letter_code'][0]
            polymer_type: str = entity['Polymer_type'][0]
            if polymer_code and polymer_code != '.':
                polymer_code = polymer_code.strip().upper().replace(
                    ' ', '').replace('\n', '')
                comp_loop = pynmrstar.Loop.from_scratch('_Entity_comp_index')
                comp_loop.add_tag([
                    '_Entity_comp_index.ID', '_Entity_comp_index.Auth_seq_ID',
                    '_Entity_comp_index.Comp_ID',
                    '_Entity_comp_index.Comp_label',
                    '_Entity_comp_index.Entry_ID',
                    '_Entity_comp_index.Entity_ID'
                ])

                # For simple DNA, RNA, and proteins
                if polymer_type in residue_mappings:
                    for x, residue in enumerate(polymer_code):
                        comp_loop.data.append([
                            x + 1, None,
                            residue_mappings[polymer_type].get(residue, 'X'),
                            None, None, None
                        ])

                # If it is something else, it needs to be manually annotated
                else:
                    for x, residue in enumerate(polymer_code):
                        comp_loop.data.append(
                            [x + 1, None, 'X', None, None, None])

                entity.add_loop(comp_loop)

                polymer_loop = pynmrstar.Loop.from_scratch('_Entity_poly_seq')
                polymer_loop.add_tag([
                    '_Entity_poly_seq.Hetero', '_Entity_poly_seq.Mon_ID',
                    '_Entity_poly_seq.Num', '_Entity_poly_seq.Comp_index_ID',
                    '_Entity_poly_seq.Entry_ID', '_Entity_poly_seq.Entity_ID'
                ])
                # For simple DNA, RNA, and proteins
                if polymer_type in residue_mappings:
                    for x, residue in enumerate(polymer_code):
                        polymer_loop.data.append([
                            None,
                            residue_mappings[polymer_type].get(residue, 'X'),
                            x + 1, x + 1, None, None
                        ])
                # If it is something else, it needs to be manually annotated
                else:
                    for x, residue in enumerate(polymer_code):
                        polymer_loop.data.append(
                            [x + 1, None, 'X', None, None, None])
                entity.add_loop(polymer_loop)

        # Calculate the values needed to insert into ETS
        today_str: str = date.today().isoformat()
        today_date: datetime = datetime.now()

        # Set the accession and submission date
        entry_saveframe: pynmrstar.saveframe = final_entry.get_saveframes_by_category(
            'entry_information')[0]
        entry_saveframe['Submission_date'] = today_str
        entry_saveframe['Accession_date'] = today_str

        # Do final entry normalization
        final_entry.normalize(schema=schema)

        params = {
            'source': 'Author',
            'submit_type': 'Dep',
            'status': 'nd',
            'lit_search_required': 'N',
            'submission_date': today_str,
            'accession_date': today_str,
            'last_updated': today_str,
            'molecular_system': final_entry['entry_information_1']['Title'][0],
            'onhold_status': 'Pub',
            'restart_id': final_entry.entry_id
        }

        # Dep_release_code_nmr_exptl was wrongly used in place of Release_request in dictionary versions < 3.2.8.1
        try:
            release_status: str = final_entry['entry_information_1'][
                'Dep_release_code_nmr_exptl'][0].upper()
        except (KeyError, ValueError):
            release_status = final_entry['entry_information_1'][
                'Release_request'][0].upper()

        if release_status == 'RELEASE NOW':
            params['onhold_status'] = today_date.strftime("%m/%d/%y")
        elif release_status == 'HOLD FOR 4 WEEKS':
            params['onhold_status'] = (
                today_date + relativedelta(weeks=4)).strftime("%m/%d/%y")
        elif release_status == 'HOLD FOR 8 WEEKS':
            params['onhold_status'] = (
                today_date + relativedelta(weeks=+8)).strftime("%m/%d/%y")
        elif release_status == 'HOLD FOR 6 MONTHS':
            params['onhold_status'] = (
                today_date + relativedelta(months=+6)).strftime("%m/%d/%y")
        elif release_status == 'HOLD FOR 1 YEAR':
            params['onhold_status'] = (
                today_date + relativedelta(years=+1)).strftime("%m/%d/%y")
        elif release_status == 'HOLD FOR PUBLICATION':
            params['onhold_status'] = 'Pub'
        else:
            raise ServerError('Invalid release code.')

        contact_loop: pynmrstar.Loop = final_entry.get_loops_by_category(
            "_Contact_Person")[0]
        params['author_email'] = ",".join(
            contact_loop.get_tag(['Email_address']))
        contact_people = [
            ', '.join(x)
            for x in contact_loop.get_tag(['Family_name', 'Given_name'])
        ]
        params['contact_person1'] = contact_people[0]
        params['contact_person2'] = contact_people[1]

        ranges = configuration['ets']['deposition_ranges']
        if len(ranges) == 0:
            raise ServerError('Server configuration error.')

        # If they have already deposited, just keep the same BMRB ID
        bmrbnum = self.metadata.get('bmrbnum', None)
        if configuration['debug'] and configuration['ets'][
                'host'] == 'CHANGE_ME' and not bmrbnum:
            bmrbnum = 999999
        if bmrbnum:
            params['bmrbnum'] = bmrbnum
        else:
            try:
                conn = psycopg2.connect(
                    user=configuration['ets']['user'],
                    host=configuration['ets']['host'],
                    database=configuration['ets']['database'])
                cur = conn.cursor()
            except psycopg2.OperationalError:
                logging.exception(
                    'Could not connect to ETS database. Is the server down, or the configuration wrong?'
                )
                raise ServerError(
                    'Could not connect to entry tracking system. Please contact us.'
                )

            try:
                # Determine which bmrbnum to use - one range at a time
                bmrbnum: Optional[int] = None
                for id_range in ranges:
                    # Get the existing IDs from ETS
                    bmrb_sql: str = 'SELECT bmrbnum FROM entrylog WHERE bmrbnum >= %s AND bmrbnum <= %s;'
                    cur.execute(bmrb_sql, [id_range[0], id_range[1]])

                    # Calculate the list of valid IDs
                    existing_ids: set = set([_[0] for _ in cur.fetchall()])
                    ids_in_range: set = set(range(id_range[0], id_range[1]))
                    assignable_ids = sorted(
                        list(ids_in_range.difference(existing_ids)))

                    # A valid ID has been found in this range
                    if len(assignable_ids) > 0:
                        bmrbnum = assignable_ids[0]
                        break
                    else:
                        logging.warning(
                            'No valid IDs found in range %d to %d. Continuing to next range...'
                            % (id_range[0], id_range[1]))

                if not bmrbnum:
                    logging.exception(
                        'No valid IDs remaining in any of the ranges!')
                    raise ServerError(
                        'Could not find a valid BMRB ID to assign. Please contact us.'
                    )

                params['bmrbnum'] = bmrbnum

                # Create the deposition record
                insert_query = """
INSERT INTO entrylog (depnum, bmrbnum, status, submission_date, accession_date, onhold_status, molecular_system,
                      contact_person1, contact_person2, submit_type, source, lit_search_required, author_email,
                      restart_id, last_updated, nmr_dep_code)
  VALUES (nextval('depnum_seq'), %(bmrbnum)s, %(status)s, %(submission_date)s, %(accession_date)s, %(onhold_status)s,
                             %(molecular_system)s, %(contact_person1)s, %(contact_person2)s, %(submit_type)s,
                             %(source)s, %(lit_search_required)s, %(author_email)s, %(restart_id)s, %(last_updated)s,
                             %(restart_id)s)"""
                cur.execute(insert_query, params)
                log_sql = """
INSERT INTO logtable (logid,depnum,actdesc,newstatus,statuslevel,logdate,login)
  VALUES (nextval('logid_seq'),currval('depnum_seq'),'NEW DEPOSITION','nd',1,now(),'')"""
                cur.execute(log_sql)
                conn.commit()
            except psycopg2.IntegrityError:
                logging.exception(
                    'Could not assign the chosen BMRB ID - it was already assigned.'
                )
                conn.rollback()
                raise ServerError(
                    'Could not create deposition. Please try again.')

        # Assign the BMRB ID in all the appropriate places in the entry
        final_entry.entry_id = bmrbnum

        # Write the final deposition to disk
        self.write_file('deposition.str', str(final_entry).encode(), root=True)
        self.metadata['entry_deposited'] = True
        self.metadata['deposition_date'] = datetime.utcnow().strftime(
            "%I:%M %p on %B %d, %Y")
        self.metadata['bmrbnum'] = bmrbnum
        self.metadata['server_version_at_deposition'] = get_release()
        self.commit('Deposition submitted!')

        # Return the assigned BMRB ID
        return bmrbnum
Esempio n. 11
0
def fetch_or_store_deposition(uuid):
    """ Fetches or stores an entry based on uuid """

    # Store an entry
    if request.method == "PUT":
        entry_json: dict = request.get_json()
        try:
            entry: pynmrstar.Entry = pynmrstar.Entry.from_json(entry_json)
        except ValueError:
            raise RequestError(
                "Invalid JSON uploaded. The JSON was not a valid NMR-STAR entry."
            )

        with depositions.DepositionRepo(uuid) as repo:
            existing_entry: pynmrstar.Entry = repo.get_entry()

            # If they aren't making any changes
            try:
                if existing_entry == entry:
                    return jsonify({'commit': repo.last_commit})
            except ValueError as err:
                raise RequestError(repr(err))

            if existing_entry.entry_id != entry.entry_id:
                raise RequestError(
                    "Refusing to overwrite entry with entry of different ID.")

            # Next two lines can be removed after clients upgrade (06/01/2020)
            if isinstance(entry_json['commit'], str):
                entry_json['commit'] = [entry_json['commit']]

            if repo.last_commit not in entry_json['commit']:
                if 'force' not in entry_json:
                    logging.exception('An entry changed on the server!')
                    return jsonify({'error': 'reload'})

            # Update the entry data
            repo.write_entry(entry)
            repo.commit("Entry updated.")

            return jsonify({'commit': repo.last_commit})

    # Load an entry
    elif request.method == "GET":

        with depositions.DepositionRepo(uuid) as repo:
            entry: pynmrstar.Entry = repo.get_entry()
            schema_version: str = repo.metadata['schema_version']
            data_files: List[str] = repo.get_data_file_list()
            email_validated: bool = repo.metadata['email_validated']
            entry_deposited: bool = repo.metadata['entry_deposited']
            deposition_nickname: str = repo.metadata['deposition_nickname']
            commit: str = repo.last_commit
        try:
            schema: dict = get_schema(schema_version)
        except RequestError:
            raise ServerError(
                "Entry specifies schema that doesn't exist on the server: %s" %
                schema_version)

        entry: dict = entry.get_json(serialize=False)
        entry['schema'] = schema
        entry['data_files'] = data_files
        entry['email_validated'] = email_validated
        entry['entry_deposited'] = entry_deposited
        entry['deposition_nickname'] = deposition_nickname
        entry['commit'] = [commit]

        return jsonify(entry)
Esempio n. 12
0
def new_deposition() -> Response:
    """ Starts a new deposition. """

    request_info: Dict[str, Any] = request.form

    if not request_info or 'email' not in request_info:
        raise RequestError("Must specify user e-mail to start a session.")

    if 'deposition_nickname' not in request_info:
        raise RequestError("Must specify a nickname for the deposition.")

    skip_email_validation = False
    if 'skip_validation' in request_info:
        skip_email_validation = True

    uploaded_entry: Optional[pynmrstar.Entry] = None
    entry_bootstrap: bool = False
    if 'nmrstar_file' in request.files and request.files[
            'nmrstar_file'] and request.files['nmrstar_file'].filename:
        try:
            uploaded_entry = pynmrstar.Entry.from_string(
                request.files['nmrstar_file'].read().decode())
        except pynmrstar.exceptions.ParsingError as e:
            raise RequestError("Invalid NMR-STAR file: %s" % repr(e))
        except UnicodeDecodeError:
            raise RequestError(
                "Invalid uploaded file. It is not an ASCII file.")
    # Check if they are bootstrapping from an existing entry - if so, make sure they didn't also upload a file
    if 'bootstrapID' in request_info and request_info['bootstrapID'] != 'null':
        if uploaded_entry:
            raise RequestError(
                'Cannot create an entry from an uploaded file and existing entry.'
            )
        try:
            uploaded_entry = pynmrstar.Entry.from_database(
                request_info['bootstrapID'])
        except IOError:
            raise RequestError(
                'Invalid entry ID specified. No such entry exists, or is released.'
            )
        entry_bootstrap = True

    author_email: str = request_info.get('email', '').lower()
    author_orcid: Optional[str] = request_info.get('orcid')
    if not author_orcid:
        author_orcid = None

    # Check the e-mail
    if not skip_email_validation:
        try:
            if not validate_email(author_email):
                raise RequestError(
                    "The e-mail you provided is not a valid e-mail. Please check the e-mail you "
                    "provided for typos.")
            elif not validate_email(
                    author_email, check_mx=True, smtp_timeout=3):
                raise RequestError(
                    "The e-mail you provided is invalid. There is no e-mail server at '%s'. (Do you "
                    "have a typo in the part of your e-mail after the @?) If you are certain"
                    " that your e-mail is correct, please select the 'My e-mail is correct' checkbox "
                    "and click to start a new deposition again." %
                    (author_email[author_email.index("@") + 1:]))
        except Timeout:
            raise RequestError(
                "The e-mail you provided is invalid. There was no response when attempting to connect "
                "to the server at %s. If you are certain that your e-mail is correct, please select the"
                " 'My e-mail is correct' checkbox and click to start a new deposition again."
                % author_email[author_email.index("@") + 1:])
        except NXDOMAIN:
            raise RequestError(
                "The e-mail you provided is invalid. The domain '%s' is not a valid domain."
                % author_email[author_email.index("@") + 1:])

    # Create the deposition
    deposition_id = str(uuid4())
    schema_name = configuration['schema_version']
    if request_info.get('deposition_type',
                        'macromolecule') == "small molecule":
        schema_name += "-sm"
    schema: pynmrstar.Schema = pynmrstar.Schema(
        get_schema(schema_name, schema_format='xml'))
    json_schema: dict = get_schema(schema_name)
    entry_template: pynmrstar.Entry = pynmrstar.Entry.from_template(
        entry_id=deposition_id,
        all_tags=True,
        default_values=True,
        schema=schema)

    # Merge the entries
    if uploaded_entry:
        merge_entries(entry_template, uploaded_entry, schema)

    # Delete the large data loops after merging, if the entry was uploaded and may have them
    if uploaded_entry:
        for saveframe in entry_template:
            for loop in saveframe:
                if loop.category in [
                        '_Atom_chem_shift', '_Atom_site',
                        '_Gen_dist_constraint'
                ]:
                    loop.data = []

    # Calculate the uploaded file types, if they upload a file
    if uploaded_entry and not entry_bootstrap:
        data_file_loop: pynmrstar.Loop = pynmrstar.Loop.from_scratch()
        data_file_loop.add_tag([
            '_Upload_data.Data_file_ID',
            '_Upload_data.Deposited_data_files_ID',
            '_Upload_data.Data_file_name',
            '_Upload_data.Data_file_content_type',
            '_Upload_data.Data_file_Sf_category'
        ])
        upload_filename: str = secure_filename(
            request.files['nmrstar_file'].filename)

        # Get the categories types which are "data types"
        legal_data_categories: dict = dict()
        for data_upload_record in json_schema['file_upload_types']:
            for one_data_type in data_upload_record[1]:
                legal_data_categories[one_data_type] = data_upload_record[0]

        # If this entry has categories that are valid data types, add them
        pos: int = 1
        for data_type in uploaded_entry.category_list:
            if data_type in legal_data_categories:
                if data_type != 'chem_comp' and data_type != 'experiment_list':
                    data_file_loop.add_data([
                        pos, 1, upload_filename,
                        legal_data_categories[data_type], data_type
                    ])
                    pos += 1
        data_file_loop.add_missing_tags(all_tags=True, schema=schema)
        entry_template.get_saveframes_by_category(
            'deposited_data_files')[0]['_Upload_data'] = data_file_loop

    entry_template.normalize(schema=schema)

    # Set the entry information tags
    entry_saveframe: pynmrstar.Saveframe = entry_template.get_saveframes_by_category(
        'entry_information')[0]
    entry_saveframe['NMR_STAR_version'] = schema.version
    entry_saveframe['Original_NMR_STAR_version'] = schema.version

    # Suggest some default sample conditions
    sample_conditions: pynmrstar.Loop = entry_template.get_loops_by_category(
        '_Sample_condition_variable')[0]
    if sample_conditions.empty:
        sample_conditions.data = [[
            None for _ in range(len(sample_conditions.tags))
        ] for _ in range(4)]
        sample_conditions['Type'] = [
            'temperature', 'pH', 'pressure', 'ionic strength'
        ]
        sample_conditions['Val'] = [None, None, '1', None]
        sample_conditions['Val_units'] = ['K', 'pH', 'atm', 'M']

    # Just add a single row to the entry author loop
    author_loop: pynmrstar.Loop = entry_saveframe['_Entry_author']
    author_loop.data.insert(0, ['.'] * len(author_loop.tags))

    # Modify the contact_loop as needed
    contact_loop: pynmrstar.Loop = entry_saveframe['_Contact_person']

    # Make sure that whoever started the deposition is locked as the first contact person
    contact_emails: List[str] = contact_loop.get_tag('email_address')
    if author_email in contact_emails:
        # They are already there, move their data to the first row and update it if necessary
        contact_loop.data.insert(
            0, contact_loop.data.pop(contact_emails.index(author_email)))
    else:
        # They are not yet present in the contact persons
        contact_loop.data.insert(0, ['.'] * len(contact_loop.tags))
        contact_loop.data[0][contact_loop.tag_index(
            'Email_address')] = author_email
    # Need to be 2 contact authors
    if len(contact_loop.data) < 2:
        contact_loop.data.append(['.'] * len(contact_loop.tags))
    contact_loop.renumber_rows('ID')

    # Look up information based on the ORCID
    if author_orcid:
        contact_loop.data[0][contact_loop.tag_index('ORCID')] = author_orcid
        if 'orcid' not in configuration or configuration['orcid'][
                'bearer'] == 'CHANGEME':
            logging.warning(
                'Please specify your ORCID API credentials, or else auto-filling from ORCID will fail.'
            )
        else:
            try:
                r = requests.get(configuration['orcid']['url'] % author_orcid,
                                 headers={
                                     "Accept":
                                     "application/json",
                                     'Authorization':
                                     'Bearer %s' %
                                     configuration['orcid']['bearer']
                                 })
            except (requests.exceptions.ConnectionError,
                    requests.exceptions.ConnectTimeout):
                raise ServerError(
                    'An error occurred while contacting the ORCID server.')
            if not r.ok:
                if r.status_code == 404:
                    raise RequestError('Invalid ORCID!')
                else:
                    raise ServerError(
                        'An error occurred while contacting the ORCID server.')
            else:
                orcid_json = r.json()
                try:
                    author_given = orcid_json['person']['name']['given-names'][
                        'value']
                except (TypeError, KeyError):
                    author_given = None
                try:
                    author_family = orcid_json['person']['name'][
                        'family-name']['value']
                except (TypeError, KeyError):
                    author_family = None
                contact_loop.data[0][contact_loop.tag_index(
                    'Given_name')] = author_given
                contact_loop.data[0][contact_loop.tag_index(
                    'Family_name')] = author_family

    # Set the loops to have at least one row of data
    for saveframe in entry_template:

        # Add a "deleted" tag to use to track deletion status
        saveframe.add_tag('_Deleted', 'no', update=True)

        for loop in saveframe:
            if not loop.data:
                loop.data = []

                iterations: int = 1
                if "Experiment_ID" in loop.tags or loop.category == '_Sample_component':
                    iterations = 3

                for x in range(1, iterations + 1):
                    row_data = []
                    for tag in loop.tags:
                        fqtn = (loop.category + '.' + tag).lower()
                        if tag == "ID":
                            row_data.append(x)
                        elif schema.schema[fqtn]['default value'] not in [
                                "?", ''
                        ]:
                            row_data.append(
                                schema.schema[fqtn]['default value'])
                        else:
                            row_data.append('.')
                    loop.data.append(row_data)

    # Set the entry_interview tags
    entry_interview: pynmrstar.Saveframe = entry_template.get_saveframes_by_category(
        'entry_interview')[0]
    for tag in json_schema['file_upload_types']:
        entry_interview[tag[2]] = "no"
    entry_interview['PDB_deposition'] = "no"
    entry_interview['BMRB_deposition'] = "yes"
    # Set the tag to store that this entry was bootstrapped
    if entry_bootstrap:
        entry_interview['Previous_BMRB_entry_used'] = request_info[
            'bootstrapID']

    entry_meta: dict = {
        'deposition_id':
        deposition_id,
        'author_email':
        author_email,
        'author_orcid':
        author_orcid,
        'last_ip':
        request.environ['REMOTE_ADDR'],
        'deposition_origination': {
            'request': dict(request.headers),
            'ip': request.environ['REMOTE_ADDR']
        },
        'email_validated':
        configuration['debug'],
        'schema_version':
        schema.version,
        'entry_deposited':
        False,
        'server_version_at_creation':
        get_release(),
        'creation_date':
        datetime.datetime.utcnow().strftime("%I:%M %p on %B %d, %Y"),
        'deposition_nickname':
        request_info['deposition_nickname'],
        'deposition_from_file':
        True if uploaded_entry else False
    }

    # Initialize the repo
    with depositions.DepositionRepo(deposition_id, initialize=True) as repo:
        # Manually set the metadata during object creation - never should be done this way elsewhere
        repo._live_metadata = entry_meta
        repo.write_entry(entry_template)
        repo.write_file('schema.json',
                        data=json.dumps(json_schema).encode(),
                        root=True)
        if uploaded_entry:
            if entry_bootstrap:
                entry_meta['bootstrap_entry'] = request_info['bootstrapID']
                repo.write_file('bootstrap_entry.str',
                                data=str(uploaded_entry).encode(),
                                root=True)
            else:
                request.files['nmrstar_file'].seek(0)
                repo.write_file('bootstrap_entry.str',
                                data=request.files['nmrstar_file'].read(),
                                root=True)
                entry_meta['bootstrap_filename'] = repo.write_file(
                    request.files['nmrstar_file'].filename,
                    data=str(uploaded_entry).encode())
        repo.commit("Entry created.")

        # Send the validation e-mail
        send_validation_email(deposition_id, repo)

    return jsonify({'deposition_id': deposition_id})