def get_file(self, path: str, root: bool = True) -> BinaryIO: """ Returns the current version of a file from the repo. """ secured_path, secured_filename = secure_full_path(path) if not secured_filename: raise RequestError('Cannot access directories, just files.') try: if root: return open(os.path.join(self._entry_dir, secured_filename), "rb") else: return open( os.path.join(self._entry_dir, 'data_files', secured_path, secured_filename), 'rb') except IOError: raise RequestError('No file with that name saved for this entry.')
def get_schema(version: str, schema_format: str = "json") -> Union[dict, TextIO]: """ Return the schema from disk. """ # When running locally schema_dir = os.path.join(root_dir, '..', 'schema', 'schema_data') if not os.path.exists(schema_dir): schema_dir = os.path.join(root_dir, '..', 'schema_data') if not os.path.exists(schema_dir): raise IOError("No schema directory found: %s" % schema_dir) try: if schema_format == "json": with open(os.path.join(schema_dir, version + '.json.zlib'), 'rb') as schema_file: schema = json.loads( zlib.decompress(schema_file.read()).decode()) elif schema_format == "xml": return open(os.path.join(schema_dir, version + '.xml'), 'r') else: raise ServerError('Attempted to load invalid schema type.') except IOError: raise RequestError("Invalid schema version.") return schema
def secure_filename(filename: str) -> str: """ Wraps werkzeug secure_filename but raises an error if the filename comes out empty. """ filename = werkzeug.utils.secure_filename(filename) if not filename: raise RequestError( 'Invalid upload file name. Please rename the file and try again.') return filename
def raise_write_errors(self): """ Raises an error if the entry may not be edited. This could happen if it is already deposited, or the email has not been validated.""" if not self._initialize: if self.metadata['entry_deposited']: raise RequestError( 'Entry already deposited, no changes allowed.') if self._read_only: raise ServerError('Cannot write to a deposition opened read-only!')
def deposit_entry(uuid) -> Response: """ Complete the deposition. """ if 'deposition_contents' not in request.form or not request.form[ 'deposition_contents']: raise RequestError('No deposition submitted.') final_entry: pynmrstar.Entry = pynmrstar.Entry.from_string( request.form['deposition_contents']) with depositions.DepositionRepo(uuid) as repo: bmrb_num = repo.deposit(final_entry) # Send out the e-mails contact_emails: List[str] = final_entry.get_loops_by_category( "_Contact_Person")[0].get_tag(['Email_address']) contact_full = [ "%s %s <%s>" % tuple(x) for x in final_entry.get_loops_by_category("_Contact_Person") [0].get_tag(['Given_name', 'Family_name', 'Email_address']) ] message = Message("Your entry has been deposited!", recipients=contact_emails, reply_to=configuration['smtp']['reply_to_address']) message.html = 'Thank you for your deposition! Your assigned BMRB ID is %s. We have attached a copy of the ' \ 'deposition contents for reference. You may also use this file to start a new deposition. ' \ 'You will hear from our annotators in the next few days. Please note that any data files that ' \ 'you uploaded will be manually integrated into the final NMR-STAR file by the BMRB annotators ' \ '- their contents are not included in the NMR-STAR file attached to this e-mail.<br><br>' \ 'Deposited data files: %s' % (bmrb_num, repo.get_data_file_list()) message.attach("%s.str" % uuid, "text/plain", str(final_entry)) mail.send(message) # Send a message to the annotators if not configuration['debug']: if isinstance(configuration['smtp']['annotator_address'], list): send_to = configuration['smtp']['annotator_address'] else: send_to = [configuration['smtp']['annotator_address']] message = Message("BMRBdep: BMRB entry %s has been deposited." % bmrb_num, recipients=send_to) message.body = '''The following new entry has been deposited via BMRBdep: restart id: %s bmrb accession number: %s title: %s contact persons: %s ''' % (uuid, bmrb_num, final_entry['entry_information_1']['Title'][0], contact_full) mail.send(message) return jsonify({'commit': repo.last_commit})
def __init__(self, uuid, initialize: bool = False, read_only: bool = False): self._repo: Repo self._uuid = uuid self._initialize: bool = initialize self._read_only: bool = read_only self._modified_files: bool = False self._live_metadata: dict = {} self._original_metadata: dict = {} uuids = str(uuid) self._lock_path: str = os.path.join(configuration['repo_path'], uuids[0], uuids[1], uuids, '.git', 'api.lock') self._entry_dir: str = os.path.join(configuration['repo_path'], uuids[0], uuids[1], uuids) # Make sure the entry ID is valid, or throw an exception if not os.path.exists(self._entry_dir): if not self._initialize: raise RequestError('No deposition with that ID exists!', status_code=404) else: # Create the entry directory (and parent folders, where needed) first_parent = os.path.join(configuration['repo_path'], uuids[0]) if not os.path.exists(first_parent): os.mkdir(first_parent) second_parent = os.path.join(configuration['repo_path'], uuids[0], uuids[1]) if not os.path.exists(second_parent): os.mkdir(second_parent) os.mkdir(self._entry_dir) os.mkdir(os.path.join(self._entry_dir, '.git')) os.mkdir(os.path.join(self._entry_dir, 'data_files')) self._repo = Repo.init(self._entry_dir) with self._repo.config_writer() as config: config.set_value("user", "name", "BMRBDep") config.set_value("user", "email", "*****@*****.**") # Create the lock object self._lock_object: FileLock = FileLock(self._lock_path, timeout=360) if not self._initialize and not self._read_only: self._repo = Repo(self._entry_dir)
def validate_user(token: str): """ Perform validation of user-email and then redirect to the entry loader URL. """ serializer = URLSafeSerializer(application.config['SECRET_KEY']) try: deposition_data = serializer.loads(token) deposition_id = deposition_data['deposition_id'] except (BadData, KeyError, TypeError): raise RequestError( 'Invalid e-mail validation token. Please request a new e-mail validation message.' ) with depositions.DepositionRepo(deposition_id) as repo: if not repo.metadata['email_validated']: repo.metadata['email_validated'] = True repo.commit("E-mail validated.") return redirect('/entry/load/%s' % deposition_id, code=302)
def store_file(uuid) -> Response: """ Stores a data file based on uuid. """ # Store a data file with depositions.DepositionRepo(uuid) as repo: temp_dir = configuration.get('temporary_directory', None) with tempfile.TemporaryDirectory(dir=temp_dir) as upload_dir: def custom_stream_factory(total_content_length, filename, content_type, content_length=None): return tempfile.NamedTemporaryFile('wb+', prefix='flaskapp', dir=upload_dir) stream, form, files = werkzeug.formparser.parse_form_data( request.environ, stream_factory=custom_stream_factory) for file_ in files.values(): if file_.name == 'file': filename = repo.write_file(file_.filename, source_path=file_.stream.name) file_.close() # Update the entry data if repo.commit("User uploaded file: %s" % file_.filename): return jsonify({ 'filename': filename, 'changed': True, 'commit': repo.last_commit }) else: return jsonify({ 'filename': filename, 'changed': False, 'commit': repo.last_commit }) raise RequestError( 'No file uploaded, or file uploaded with the wrong parameter name!' )
def delete_data_file(self, path: str) -> bool: """ Delete a data file by name.""" self.raise_write_errors() secured_path, secured_filename = secure_full_path(path) data_file_path = os.path.join(self._entry_dir, 'data_files', secured_path, secured_filename) try: if os.path.isfile(data_file_path): os.unlink(data_file_path) elif os.path.isdir(data_file_path): os.rmdir(data_file_path) except FileNotFoundError: return False except OSError: raise RequestError( 'You must first remove any files in a directory before removing the directory itself.' ) self._modified_files = True return True
def deposit(self, final_entry: pynmrstar.Entry) -> int: """ Deposits an entry into ETS. """ self.raise_write_errors() if not self.metadata['email_validated']: raise RequestError( 'You must validate your e-mail before deposition.') contact_emails: List[str] = final_entry.get_loops_by_category( "_Contact_Person")[0].get_tag(['Email_address']) if self.metadata['author_email'] not in contact_emails: raise RequestError( 'At least one contact person must have the email of the original deposition creator.' ) existing_entry_id = self.get_entry().entry_id if existing_entry_id != final_entry.entry_id: raise RequestError( 'Invalid deposited entry. The ID must match that of this deposition.' ) logging.info('Depositing deposition %s' % final_entry.entry_id) # Determine which schema version the entry is using schema: pynmrstar.Schema = pynmrstar.Schema( get_schema(self.metadata['schema_version'], schema_format='xml')) # Add tags stripped by the deposition interface final_entry.add_missing_tags(schema=schema) # We'll use this to assign Experiment_name tags later experiment_names: dict = {} try: experiment_names = dict( final_entry.get_loops_by_category('_Experiment')[0].get_tag( ['id', 'name'])) except IndexError: pass # Assign the PubMed ID for citation in final_entry.get_saveframes_by_category('citations'): if citation['PubMed_ID'] and citation['PubMed_ID'] != ".": update_citation_with_pubmed(citation, schema=schema) # Generate any necessary entities from chemcomps upgrade_chemcomps_and_create_entities_where_needed(final_entry, schema=schema) for saveframe in final_entry: # Remove all unicode from the entry for tag in saveframe.tag_iterator(): if isinstance(tag[1], str): tag[1] = unidecode.unidecode(tag[1]) # In case only non-convertible unicode characters were there if tag[1] == '': tag[1] = None for loop in saveframe.loops: for row in loop.data: for pos in range(0, len(row)): if isinstance(row[pos], str): row[pos] = unidecode.unidecode(row[pos]) # In case only non-convertible unicode characters were there if row[pos] == '': row[pos] = None # Set the "Experiment_name" tag from the "Experiment_ID" tag if 'Experiment_ID' in loop.tags: name_tag_index = loop.tag_index('Experiment_name') if name_tag_index is None: loop.add_tag('Experiment_name', update_data=True) name_tag_index = loop.tag_index('Experiment_name') id_tag_index = loop.tag_index('Experiment_ID') for row in loop.data: if row[id_tag_index] in experiment_names: row[name_tag_index] = experiment_names[ row[id_tag_index]] # Calculate the tag _Assembly.Number_of_components if saveframe.category == 'assembly': saveframe.add_tag('_Assembly.Number_of_components', len(saveframe['_Entity_assembly'].data), update=True) # Tweak the middle initials for loop_cat in [ final_entry.get_loops_by_category(x) for x in ['_Contact_person', '_Entry_author', '_Citation_author'] ]: for loop in loop_cat: middle_initial_index = loop.tag_index('Middle_initials') first_initial_index = loop.tag_index('First_initial') for row in loop.data: if middle_initial_index and row[middle_initial_index]: row[middle_initial_index] = ".".join( row[middle_initial_index].replace(".", "")) + '.' if first_initial_index and row[middle_initial_index]: row[middle_initial_index] = ".".join( row[middle_initial_index].replace(".", "")) + '.' # Delete the chemcomps if there is no ligand try: organic_count = int( final_entry.get_tag('Assembly.Organic_ligands')[0]) except (ValueError, IndexError, TypeError): organic_count = 1 try: metal_count = int(final_entry.get_tag('Assembly.Metal_ions')[0]) except (ValueError, IndexError, TypeError): metal_count = 1 if metal_count + organic_count == 0: for saveframe in final_entry.get_saveframes_by_category( 'chem_comp'): del final_entry[saveframe] # Insert the loops for residue sequences for entity in final_entry.get_saveframes_by_category('entity'): polymer_code: str = entity['Polymer_seq_one_letter_code'][0] polymer_type: str = entity['Polymer_type'][0] if polymer_code and polymer_code != '.': polymer_code = polymer_code.strip().upper().replace( ' ', '').replace('\n', '') comp_loop = pynmrstar.Loop.from_scratch('_Entity_comp_index') comp_loop.add_tag([ '_Entity_comp_index.ID', '_Entity_comp_index.Auth_seq_ID', '_Entity_comp_index.Comp_ID', '_Entity_comp_index.Comp_label', '_Entity_comp_index.Entry_ID', '_Entity_comp_index.Entity_ID' ]) # For simple DNA, RNA, and proteins if polymer_type in residue_mappings: for x, residue in enumerate(polymer_code): comp_loop.data.append([ x + 1, None, residue_mappings[polymer_type].get(residue, 'X'), None, None, None ]) # If it is something else, it needs to be manually annotated else: for x, residue in enumerate(polymer_code): comp_loop.data.append( [x + 1, None, 'X', None, None, None]) entity.add_loop(comp_loop) polymer_loop = pynmrstar.Loop.from_scratch('_Entity_poly_seq') polymer_loop.add_tag([ '_Entity_poly_seq.Hetero', '_Entity_poly_seq.Mon_ID', '_Entity_poly_seq.Num', '_Entity_poly_seq.Comp_index_ID', '_Entity_poly_seq.Entry_ID', '_Entity_poly_seq.Entity_ID' ]) # For simple DNA, RNA, and proteins if polymer_type in residue_mappings: for x, residue in enumerate(polymer_code): polymer_loop.data.append([ None, residue_mappings[polymer_type].get(residue, 'X'), x + 1, x + 1, None, None ]) # If it is something else, it needs to be manually annotated else: for x, residue in enumerate(polymer_code): polymer_loop.data.append( [x + 1, None, 'X', None, None, None]) entity.add_loop(polymer_loop) # Calculate the values needed to insert into ETS today_str: str = date.today().isoformat() today_date: datetime = datetime.now() # Set the accession and submission date entry_saveframe: pynmrstar.saveframe = final_entry.get_saveframes_by_category( 'entry_information')[0] entry_saveframe['Submission_date'] = today_str entry_saveframe['Accession_date'] = today_str # Do final entry normalization final_entry.normalize(schema=schema) params = { 'source': 'Author', 'submit_type': 'Dep', 'status': 'nd', 'lit_search_required': 'N', 'submission_date': today_str, 'accession_date': today_str, 'last_updated': today_str, 'molecular_system': final_entry['entry_information_1']['Title'][0], 'onhold_status': 'Pub', 'restart_id': final_entry.entry_id } # Dep_release_code_nmr_exptl was wrongly used in place of Release_request in dictionary versions < 3.2.8.1 try: release_status: str = final_entry['entry_information_1'][ 'Dep_release_code_nmr_exptl'][0].upper() except (KeyError, ValueError): release_status = final_entry['entry_information_1'][ 'Release_request'][0].upper() if release_status == 'RELEASE NOW': params['onhold_status'] = today_date.strftime("%m/%d/%y") elif release_status == 'HOLD FOR 4 WEEKS': params['onhold_status'] = ( today_date + relativedelta(weeks=4)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR 8 WEEKS': params['onhold_status'] = ( today_date + relativedelta(weeks=+8)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR 6 MONTHS': params['onhold_status'] = ( today_date + relativedelta(months=+6)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR 1 YEAR': params['onhold_status'] = ( today_date + relativedelta(years=+1)).strftime("%m/%d/%y") elif release_status == 'HOLD FOR PUBLICATION': params['onhold_status'] = 'Pub' else: raise ServerError('Invalid release code.') contact_loop: pynmrstar.Loop = final_entry.get_loops_by_category( "_Contact_Person")[0] params['author_email'] = ",".join( contact_loop.get_tag(['Email_address'])) contact_people = [ ', '.join(x) for x in contact_loop.get_tag(['Family_name', 'Given_name']) ] params['contact_person1'] = contact_people[0] params['contact_person2'] = contact_people[1] ranges = configuration['ets']['deposition_ranges'] if len(ranges) == 0: raise ServerError('Server configuration error.') # If they have already deposited, just keep the same BMRB ID bmrbnum = self.metadata.get('bmrbnum', None) if configuration['debug'] and configuration['ets'][ 'host'] == 'CHANGE_ME' and not bmrbnum: bmrbnum = 999999 if bmrbnum: params['bmrbnum'] = bmrbnum else: try: conn = psycopg2.connect( user=configuration['ets']['user'], host=configuration['ets']['host'], database=configuration['ets']['database']) cur = conn.cursor() except psycopg2.OperationalError: logging.exception( 'Could not connect to ETS database. Is the server down, or the configuration wrong?' ) raise ServerError( 'Could not connect to entry tracking system. Please contact us.' ) try: # Determine which bmrbnum to use - one range at a time bmrbnum: Optional[int] = None for id_range in ranges: # Get the existing IDs from ETS bmrb_sql: str = 'SELECT bmrbnum FROM entrylog WHERE bmrbnum >= %s AND bmrbnum <= %s;' cur.execute(bmrb_sql, [id_range[0], id_range[1]]) # Calculate the list of valid IDs existing_ids: set = set([_[0] for _ in cur.fetchall()]) ids_in_range: set = set(range(id_range[0], id_range[1])) assignable_ids = sorted( list(ids_in_range.difference(existing_ids))) # A valid ID has been found in this range if len(assignable_ids) > 0: bmrbnum = assignable_ids[0] break else: logging.warning( 'No valid IDs found in range %d to %d. Continuing to next range...' % (id_range[0], id_range[1])) if not bmrbnum: logging.exception( 'No valid IDs remaining in any of the ranges!') raise ServerError( 'Could not find a valid BMRB ID to assign. Please contact us.' ) params['bmrbnum'] = bmrbnum # Create the deposition record insert_query = """ INSERT INTO entrylog (depnum, bmrbnum, status, submission_date, accession_date, onhold_status, molecular_system, contact_person1, contact_person2, submit_type, source, lit_search_required, author_email, restart_id, last_updated, nmr_dep_code) VALUES (nextval('depnum_seq'), %(bmrbnum)s, %(status)s, %(submission_date)s, %(accession_date)s, %(onhold_status)s, %(molecular_system)s, %(contact_person1)s, %(contact_person2)s, %(submit_type)s, %(source)s, %(lit_search_required)s, %(author_email)s, %(restart_id)s, %(last_updated)s, %(restart_id)s)""" cur.execute(insert_query, params) log_sql = """ INSERT INTO logtable (logid,depnum,actdesc,newstatus,statuslevel,logdate,login) VALUES (nextval('logid_seq'),currval('depnum_seq'),'NEW DEPOSITION','nd',1,now(),'')""" cur.execute(log_sql) conn.commit() except psycopg2.IntegrityError: logging.exception( 'Could not assign the chosen BMRB ID - it was already assigned.' ) conn.rollback() raise ServerError( 'Could not create deposition. Please try again.') # Assign the BMRB ID in all the appropriate places in the entry final_entry.entry_id = bmrbnum # Write the final deposition to disk self.write_file('deposition.str', str(final_entry).encode(), root=True) self.metadata['entry_deposited'] = True self.metadata['deposition_date'] = datetime.utcnow().strftime( "%I:%M %p on %B %d, %Y") self.metadata['bmrbnum'] = bmrbnum self.metadata['server_version_at_deposition'] = get_release() self.commit('Deposition submitted!') # Return the assigned BMRB ID return bmrbnum
def fetch_or_store_deposition(uuid): """ Fetches or stores an entry based on uuid """ # Store an entry if request.method == "PUT": entry_json: dict = request.get_json() try: entry: pynmrstar.Entry = pynmrstar.Entry.from_json(entry_json) except ValueError: raise RequestError( "Invalid JSON uploaded. The JSON was not a valid NMR-STAR entry." ) with depositions.DepositionRepo(uuid) as repo: existing_entry: pynmrstar.Entry = repo.get_entry() # If they aren't making any changes try: if existing_entry == entry: return jsonify({'commit': repo.last_commit}) except ValueError as err: raise RequestError(repr(err)) if existing_entry.entry_id != entry.entry_id: raise RequestError( "Refusing to overwrite entry with entry of different ID.") # Next two lines can be removed after clients upgrade (06/01/2020) if isinstance(entry_json['commit'], str): entry_json['commit'] = [entry_json['commit']] if repo.last_commit not in entry_json['commit']: if 'force' not in entry_json: logging.exception('An entry changed on the server!') return jsonify({'error': 'reload'}) # Update the entry data repo.write_entry(entry) repo.commit("Entry updated.") return jsonify({'commit': repo.last_commit}) # Load an entry elif request.method == "GET": with depositions.DepositionRepo(uuid) as repo: entry: pynmrstar.Entry = repo.get_entry() schema_version: str = repo.metadata['schema_version'] data_files: List[str] = repo.get_data_file_list() email_validated: bool = repo.metadata['email_validated'] entry_deposited: bool = repo.metadata['entry_deposited'] deposition_nickname: str = repo.metadata['deposition_nickname'] commit: str = repo.last_commit try: schema: dict = get_schema(schema_version) except RequestError: raise ServerError( "Entry specifies schema that doesn't exist on the server: %s" % schema_version) entry: dict = entry.get_json(serialize=False) entry['schema'] = schema entry['data_files'] = data_files entry['email_validated'] = email_validated entry['entry_deposited'] = entry_deposited entry['deposition_nickname'] = deposition_nickname entry['commit'] = [commit] return jsonify(entry)
def new_deposition() -> Response: """ Starts a new deposition. """ request_info: Dict[str, Any] = request.form if not request_info or 'email' not in request_info: raise RequestError("Must specify user e-mail to start a session.") if 'deposition_nickname' not in request_info: raise RequestError("Must specify a nickname for the deposition.") skip_email_validation = False if 'skip_validation' in request_info: skip_email_validation = True uploaded_entry: Optional[pynmrstar.Entry] = None entry_bootstrap: bool = False if 'nmrstar_file' in request.files and request.files[ 'nmrstar_file'] and request.files['nmrstar_file'].filename: try: uploaded_entry = pynmrstar.Entry.from_string( request.files['nmrstar_file'].read().decode()) except pynmrstar.exceptions.ParsingError as e: raise RequestError("Invalid NMR-STAR file: %s" % repr(e)) except UnicodeDecodeError: raise RequestError( "Invalid uploaded file. It is not an ASCII file.") # Check if they are bootstrapping from an existing entry - if so, make sure they didn't also upload a file if 'bootstrapID' in request_info and request_info['bootstrapID'] != 'null': if uploaded_entry: raise RequestError( 'Cannot create an entry from an uploaded file and existing entry.' ) try: uploaded_entry = pynmrstar.Entry.from_database( request_info['bootstrapID']) except IOError: raise RequestError( 'Invalid entry ID specified. No such entry exists, or is released.' ) entry_bootstrap = True author_email: str = request_info.get('email', '').lower() author_orcid: Optional[str] = request_info.get('orcid') if not author_orcid: author_orcid = None # Check the e-mail if not skip_email_validation: try: if not validate_email(author_email): raise RequestError( "The e-mail you provided is not a valid e-mail. Please check the e-mail you " "provided for typos.") elif not validate_email( author_email, check_mx=True, smtp_timeout=3): raise RequestError( "The e-mail you provided is invalid. There is no e-mail server at '%s'. (Do you " "have a typo in the part of your e-mail after the @?) If you are certain" " that your e-mail is correct, please select the 'My e-mail is correct' checkbox " "and click to start a new deposition again." % (author_email[author_email.index("@") + 1:])) except Timeout: raise RequestError( "The e-mail you provided is invalid. There was no response when attempting to connect " "to the server at %s. If you are certain that your e-mail is correct, please select the" " 'My e-mail is correct' checkbox and click to start a new deposition again." % author_email[author_email.index("@") + 1:]) except NXDOMAIN: raise RequestError( "The e-mail you provided is invalid. The domain '%s' is not a valid domain." % author_email[author_email.index("@") + 1:]) # Create the deposition deposition_id = str(uuid4()) schema_name = configuration['schema_version'] if request_info.get('deposition_type', 'macromolecule') == "small molecule": schema_name += "-sm" schema: pynmrstar.Schema = pynmrstar.Schema( get_schema(schema_name, schema_format='xml')) json_schema: dict = get_schema(schema_name) entry_template: pynmrstar.Entry = pynmrstar.Entry.from_template( entry_id=deposition_id, all_tags=True, default_values=True, schema=schema) # Merge the entries if uploaded_entry: merge_entries(entry_template, uploaded_entry, schema) # Delete the large data loops after merging, if the entry was uploaded and may have them if uploaded_entry: for saveframe in entry_template: for loop in saveframe: if loop.category in [ '_Atom_chem_shift', '_Atom_site', '_Gen_dist_constraint' ]: loop.data = [] # Calculate the uploaded file types, if they upload a file if uploaded_entry and not entry_bootstrap: data_file_loop: pynmrstar.Loop = pynmrstar.Loop.from_scratch() data_file_loop.add_tag([ '_Upload_data.Data_file_ID', '_Upload_data.Deposited_data_files_ID', '_Upload_data.Data_file_name', '_Upload_data.Data_file_content_type', '_Upload_data.Data_file_Sf_category' ]) upload_filename: str = secure_filename( request.files['nmrstar_file'].filename) # Get the categories types which are "data types" legal_data_categories: dict = dict() for data_upload_record in json_schema['file_upload_types']: for one_data_type in data_upload_record[1]: legal_data_categories[one_data_type] = data_upload_record[0] # If this entry has categories that are valid data types, add them pos: int = 1 for data_type in uploaded_entry.category_list: if data_type in legal_data_categories: if data_type != 'chem_comp' and data_type != 'experiment_list': data_file_loop.add_data([ pos, 1, upload_filename, legal_data_categories[data_type], data_type ]) pos += 1 data_file_loop.add_missing_tags(all_tags=True, schema=schema) entry_template.get_saveframes_by_category( 'deposited_data_files')[0]['_Upload_data'] = data_file_loop entry_template.normalize(schema=schema) # Set the entry information tags entry_saveframe: pynmrstar.Saveframe = entry_template.get_saveframes_by_category( 'entry_information')[0] entry_saveframe['NMR_STAR_version'] = schema.version entry_saveframe['Original_NMR_STAR_version'] = schema.version # Suggest some default sample conditions sample_conditions: pynmrstar.Loop = entry_template.get_loops_by_category( '_Sample_condition_variable')[0] if sample_conditions.empty: sample_conditions.data = [[ None for _ in range(len(sample_conditions.tags)) ] for _ in range(4)] sample_conditions['Type'] = [ 'temperature', 'pH', 'pressure', 'ionic strength' ] sample_conditions['Val'] = [None, None, '1', None] sample_conditions['Val_units'] = ['K', 'pH', 'atm', 'M'] # Just add a single row to the entry author loop author_loop: pynmrstar.Loop = entry_saveframe['_Entry_author'] author_loop.data.insert(0, ['.'] * len(author_loop.tags)) # Modify the contact_loop as needed contact_loop: pynmrstar.Loop = entry_saveframe['_Contact_person'] # Make sure that whoever started the deposition is locked as the first contact person contact_emails: List[str] = contact_loop.get_tag('email_address') if author_email in contact_emails: # They are already there, move their data to the first row and update it if necessary contact_loop.data.insert( 0, contact_loop.data.pop(contact_emails.index(author_email))) else: # They are not yet present in the contact persons contact_loop.data.insert(0, ['.'] * len(contact_loop.tags)) contact_loop.data[0][contact_loop.tag_index( 'Email_address')] = author_email # Need to be 2 contact authors if len(contact_loop.data) < 2: contact_loop.data.append(['.'] * len(contact_loop.tags)) contact_loop.renumber_rows('ID') # Look up information based on the ORCID if author_orcid: contact_loop.data[0][contact_loop.tag_index('ORCID')] = author_orcid if 'orcid' not in configuration or configuration['orcid'][ 'bearer'] == 'CHANGEME': logging.warning( 'Please specify your ORCID API credentials, or else auto-filling from ORCID will fail.' ) else: try: r = requests.get(configuration['orcid']['url'] % author_orcid, headers={ "Accept": "application/json", 'Authorization': 'Bearer %s' % configuration['orcid']['bearer'] }) except (requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout): raise ServerError( 'An error occurred while contacting the ORCID server.') if not r.ok: if r.status_code == 404: raise RequestError('Invalid ORCID!') else: raise ServerError( 'An error occurred while contacting the ORCID server.') else: orcid_json = r.json() try: author_given = orcid_json['person']['name']['given-names'][ 'value'] except (TypeError, KeyError): author_given = None try: author_family = orcid_json['person']['name'][ 'family-name']['value'] except (TypeError, KeyError): author_family = None contact_loop.data[0][contact_loop.tag_index( 'Given_name')] = author_given contact_loop.data[0][contact_loop.tag_index( 'Family_name')] = author_family # Set the loops to have at least one row of data for saveframe in entry_template: # Add a "deleted" tag to use to track deletion status saveframe.add_tag('_Deleted', 'no', update=True) for loop in saveframe: if not loop.data: loop.data = [] iterations: int = 1 if "Experiment_ID" in loop.tags or loop.category == '_Sample_component': iterations = 3 for x in range(1, iterations + 1): row_data = [] for tag in loop.tags: fqtn = (loop.category + '.' + tag).lower() if tag == "ID": row_data.append(x) elif schema.schema[fqtn]['default value'] not in [ "?", '' ]: row_data.append( schema.schema[fqtn]['default value']) else: row_data.append('.') loop.data.append(row_data) # Set the entry_interview tags entry_interview: pynmrstar.Saveframe = entry_template.get_saveframes_by_category( 'entry_interview')[0] for tag in json_schema['file_upload_types']: entry_interview[tag[2]] = "no" entry_interview['PDB_deposition'] = "no" entry_interview['BMRB_deposition'] = "yes" # Set the tag to store that this entry was bootstrapped if entry_bootstrap: entry_interview['Previous_BMRB_entry_used'] = request_info[ 'bootstrapID'] entry_meta: dict = { 'deposition_id': deposition_id, 'author_email': author_email, 'author_orcid': author_orcid, 'last_ip': request.environ['REMOTE_ADDR'], 'deposition_origination': { 'request': dict(request.headers), 'ip': request.environ['REMOTE_ADDR'] }, 'email_validated': configuration['debug'], 'schema_version': schema.version, 'entry_deposited': False, 'server_version_at_creation': get_release(), 'creation_date': datetime.datetime.utcnow().strftime("%I:%M %p on %B %d, %Y"), 'deposition_nickname': request_info['deposition_nickname'], 'deposition_from_file': True if uploaded_entry else False } # Initialize the repo with depositions.DepositionRepo(deposition_id, initialize=True) as repo: # Manually set the metadata during object creation - never should be done this way elsewhere repo._live_metadata = entry_meta repo.write_entry(entry_template) repo.write_file('schema.json', data=json.dumps(json_schema).encode(), root=True) if uploaded_entry: if entry_bootstrap: entry_meta['bootstrap_entry'] = request_info['bootstrapID'] repo.write_file('bootstrap_entry.str', data=str(uploaded_entry).encode(), root=True) else: request.files['nmrstar_file'].seek(0) repo.write_file('bootstrap_entry.str', data=request.files['nmrstar_file'].read(), root=True) entry_meta['bootstrap_filename'] = repo.write_file( request.files['nmrstar_file'].filename, data=str(uploaded_entry).encode()) repo.commit("Entry created.") # Send the validation e-mail send_validation_email(deposition_id, repo) return jsonify({'deposition_id': deposition_id})