def __init__(self, enzyme_type, hidden_edges=True, log_level=0): self.enzyme_type = enzyme_type self.enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] self.node_metadata = self._find_uniref_metadata() self.edge_colour = {'color': 'black'} self.edge_width = 4 self.hidden_edges = hidden_edges self.uniref_border_width = 1 self.uniref_border_colour = 'black' self.biocatdb_border_width = 3 self.biocatdb_border_colour = 'darkred' self.border_width_selected = 4 self.opacity = 0.9 self.luminosity = 'bright' self.node_colour = f'rgba(5, 5, 168, {self.opacity})' self.node_size = 100 self.node_shape = 'dot' self.log_level = log_level self.cluster_positioner = ClusterPositioner()
def merge_enzyme_types(): to_merge = request.form['to_merge'] merge_with = request.form['merge_with'] if to_merge != merge_with: enz_type = EnzymeType.objects(enzyme_type=to_merge)[0] change_enzyme_type_name(enz_type, merge_with) enz_type.delete() result = { 'status': 'success', 'msg': 'Enzyme type merged', 'issues': [] } else: result = { 'status': 'danger', 'msg': "Can't merge with self", 'issues': [] } return jsonify(result=result)
def check_blast_status(enzyme_type): seqs = Sequence.objects( db.Q(enzyme_type=enzyme_type) & db.Q(bioinformatics_ignore__ne=True) & db.Q(reviewed=True)) enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] all_complete = True for seq in seqs: if seq.blast is None: all_complete = False enz_type_obj.bioinformatics_status = 'Queued for update' enz_type_obj.save() if all_complete == True: if enz_type_obj.bioinformatics_status != 'Complete': enz_type_obj.bioinformatics_status = 'Complete' enz_type_obj.save() ssn_q = SSN_record.objects(enzyme_type=enz_type_obj) if len(ssn_q) == 1: ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0] ssn_record.status = 'Queued for update' ssn_record.save()
def parse(self, output, seq_obj): blast_record = output query_length = len(seq_obj.sequence) enzyme_type_obj = EnzymeType.objects( enzyme_type=seq_obj.enzyme_type)[0] for alignment in blast_record.alignments: identifier = alignment.hit_id.replace(self.identifier_head, '') if self._alignment_filters(alignment, query_length): db_query = UniRef50.objects( db.Q(enzyme_name=identifier) & db.Q(enzyme_type=enzyme_type_obj)).select_related() if len(db_query) == 0: protein_sequence = self._get_sequence(identifier) if self._sequence_filters(protein_sequence, query_length): self.log(f"Adding sequence for {identifier}") self._add_uniref(alignment, identifier, protein_sequence, enzyme_type_obj, seq_obj) else: uniref_obj = db_query[0] self._add_result_of_blasts_for(seq_obj, uniref_obj)
def ssn_object(): enzyme_type = request.form['enzyme_type'] enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] ssn_obj = SSN_record.objects(enzyme_type=enzyme_type_obj)[0] num_biocatdb = Sequence.objects(enzyme_type=enzyme_type).count() num_uniref = UniRef50.objects(enzyme_type=enzyme_type_obj).count() precalc_choices = {} for score in ssn_obj.num_at_alignment_score: clusters = ssn_obj.num_at_alignment_score[score] idt = ssn_obj.identity_at_alignment_score[score] choice_text = f"{score}, {clusters} clusters, avg identity {idt[0]} ± {idt[1]}" precalc_choices[score] = choice_text result = { 'status': ssn_obj.status, 'num_biocatdb': num_biocatdb, 'num_uniref': num_uniref, 'precalculated': precalc_choices } return jsonify(result=result)
def show_sequences(): args = request.args.to_dict() title = "Enzyme sequences" if 'reviewed' in args: revQ = db.Q(reviewed=True) else: revQ = db.Q() title += " (including not reviewed)" if 'enzyme_type' in args: enzyme_type_query = db.Q(enzyme_type=args['enzyme_type']) title += f" for {args['enzyme_type']} enzymes" else: enzyme_type_query = db.Q() if 'paper_id' in args: paper_query = db.Q(papers=args['paper_id']) paper = Paper.objects(id=args['paper_id'])[0] title += f" in {paper.short_citation}" else: paper_query = db.Q() enzyme_data = sequence_table.get_enzyme_data(enzyme_type_query & paper_query & revQ) enzyme_types = sorted(list(EnzymeType.objects().distinct("enzyme_type"))) return render_template('edit_tables/edit_sequences.html', seq_data=enzyme_data, seq_button_columns=[], seq_table_height='80vh', enzyme_types=enzyme_types, show_header_filters=True, include_owner=True, lock_enz_type='false', title=title, row_click_modal=True)
def check_random_uniref(num_to_check=25): for enzyme_type in EnzymeType.objects(): unirefs = UniRef50.objects(enzyme_type=enzyme_type) all_match = True if len(unirefs) != 0: for i in range(num_to_check): rand_uniref = random.choice(unirefs) name = rand_uniref.enzyme_name ref_parser = UniRef_Parser() ref_parser.load_xml(name) time.sleep(0.2) if ref_parser.check_id_match(name) == False: all_match = False if all_match != True: print( f'Identified mismatches with online uniref entries.. full uniref check for {enzyme_type.enzyme_type}' ) full_uniref_check(enzyme_type) print(f'Uniref checks complete ')
def is_type_taken(form, field): for obj in EnzymeType.objects(): if field.data == obj.enzyme_type: raise ValidationError( f'{field.data} is already an enzyme type in the database')
def get_enzymes(): enzymes = list(EnzymeType.objects().distinct('enzyme_type')) enzymes.sort() return enzymes
def set_bioinformatics_status(enzyme_type, status): enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] enz_type_obj.bioinformatics_status = status enz_type_obj.save()
def df_to_db(spec_df): #added_by_dict = make_added_by_user_dict() print('Saving biocatdb_2 excel to mongodb..') for i, row in spec_df.iterrows(): html_doi = str(row['html_doi']) doi = str(row['html_doi']) added_by_string = str(row['added_by']) list_html_to_remove = [ 'https://doi.org/', 'http://doi.org/', 'http://dx.doi.org/' ] for to_remove in list_html_to_remove: if to_remove in doi: doi = html_doi.replace(to_remove, '') if len(Paper.objects(doi=doi)) == 0: paper = Paper(short_citation=str(row['short_citation']), html=html_doi, doi=doi) paper = paper.save() print(f"{row['short_citation']} added") else: paper = Paper.objects(doi=doi)[0] if row['enzyme_type'] is not None and row['enzyme_type'] != '' and type( row['enzyme_type']) == str: if len(EnzymeType.objects(enzyme_type=row['enzyme_type'])) == 0: enz_type = EnzymeType(enzyme_type=row['enzyme_type'], description='') enz_type.save() if row['enzyme_name'] is not None and row['enzyme_name'] != '' and type( row['enzyme_name']) == str: if len(Sequence.objects(enzyme_name=row['enzyme_name'])) == 0: seq = Sequence(enzyme_name=check_is_nan(row['enzyme_name']), enzyme_type=check_is_nan(row['enzyme_type']), papers=[paper]) seq.save() else: seq = Sequence.objects(enzyme_name=row['enzyme_name'])[0] if paper not in seq.papers: seq.papers.append(paper) seq = seq.save() if row['binary'] == 1: binary = True else: binary = False if row['auto_generated'] == 1: auto_gen = True else: auto_gen = False activity = Activity( enzyme_type=check_is_nan(row['enzyme_type']), enzyme_name=check_is_nan(row['enzyme_name']), reaction=check_is_nan(row['reaction']), short_citation=check_is_nan(row['short_citation']), html_doi=check_is_nan(row['html_doi']), added_by_string=added_by_string, paper=paper, cascade_num=check_is_nan(row['cascade_num']), substrate_1_smiles=get_smile(row['substrate_1_smiles']), substrate_2_smiles=get_smile(row['substrate_2_smiles']), product_1_smiles=get_smile(row['product_1_smiles']), temperature=check_is_nan(row['temperature']), ph=check_is_nan(row['ph']), solvent=check_is_nan(row['solvent']), other_conditions=check_is_nan(row['other_conditions']), notes=check_is_nan(row['notes']), reaction_vol=check_is_nan(row['reaction_vol']), formulation=check_is_nan(row['formulation']), biocat_conc=check_is_nan(row['biocat_conc']), kcat=check_is_float(row['kcat']), km=check_is_float(row['km']), mw=check_is_float(row['mw']), substrate_1_conc=check_is_nan(row['substrate_1_conc']), substrate_2_conc=check_is_nan(row['substrate_2_conc']), specific_activity=check_is_float(row['specific_activity']), conversion=check_is_float(row['conversion']), conversion_time=check_is_float(row['conversion_time']), categorical=check_is_nan(row['categorical']), binary=binary, selectivity=check_is_nan(row['selectivity']), auto_generated=auto_gen) activity.save() print('..done')
def save_or_add_seqs(data_list, paper): # Used by upload excel user = user_datastore.get_user(current_user.id) issues = [] enzyme_types = EnzymeType.objects().distinct('enzyme_type') for seq_dict in data_list: if 'sequence_unavailable' in seq_dict: if seq_dict['sequence_unavailable'] == '': seq_dict['sequence_unavailable'] = 'False' if 'structure' in seq_dict: if seq_dict['structure'] == '': seq_dict['structure'] = 'False' if 'sequence' in seq_dict: seq_dict['sequence'] = seq_dict['sequence'].replace('\n', '') seq_dict['sequence'] = seq_dict['sequence'].replace(' ', '') if seq_dict.get('enzyme_name', '') == '': issues.append(f"Sequence must have a name") else: if len(Sequence.objects(enzyme_name=seq_dict['enzyme_name'])) == 0: if seq_dict.get('enzyme_type', '') not in enzyme_types: print( f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist" ) issues.append( f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist" ) elif sequence_check(seq_dict.get('sequence', '')) == False: print( f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters" ) issues.append( f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters" ) else: print('Creating new sequence..') seq = Sequence(enzyme_name=seq_dict['enzyme_name'], enzyme_type=seq_dict['enzyme_type'], other_names=seq_dict.get('other_names', '').split(', '), sequence=seq_dict.get('sequence', ''), n_tag=seq_dict.get('n_tag', ''), c_tag=seq_dict.get('c_tag', ''), sequence_unavailable=strtobool( seq_dict.get('sequence_unavailable', 'False')), accession=seq_dict.get('accession', ''), other_identifiers=seq_dict.get( 'other_names', '').split(', '), pdb=seq_dict.get('pdb', ''), mutant_of=seq_dict.get('mutant_of', ''), notes=seq_dict.get('notes', ''), papers=[paper], owner=user) seq.save() else: seq = Sequence.objects(enzyme_name=seq_dict['enzyme_name'])[0] if paper not in seq.papers: seq.papers.append(paper) if seq.owner == user or seq.owner is None: seq.owner = user other_names = seq_dict.get('other_names', '').split(', ') seq.other_names.extend(other_names) if (seq.sequence is None or seq.sequence == ''): seq.sequence = seq_dict.get('sequence', '') if strtobool(seq_dict.get('sequence_unavailable', 'False')) == True: seq.sequence_unavailable = True if (seq.accession is None or seq.accession == ''): seq.accession = seq_dict.get('accession', '') if seq_dict.get('pdb', '') != '': seq.pdb = seq_dict.get('pdb', '') if (seq.mutant_of is None or seq.mutant_of == ''): seq.mutant_of = seq_dict.get('mutant_of', '') if (seq.notes is None or seq.notes == ''): seq.notes = seq_dict.get('notes', '') else: print( 'Sequence already exists but owned by another user - added to paper, but no data updated' ) issues.append( 'Sequence already exists but owned by another user - added to paper, but no data updated' ) seq.save() return issues
def load_sequence_data(): name = request.form['name'] if name == '': return jsonify(result={}) seq = Sequence.objects( enzyme_name=name).exclude('papers')[0].select_related() sequences_same_type = Sequence.objects( enzyme_type=seq.enzyme_type).distinct('enzyme_name') sequences_same_type.sort() seq_array = {} for seq_same_type in sequences_same_type: seq_array[seq_same_type] = seq_same_type can_edit = False self_assigned = False other_user = False if current_user.is_authenticated: user = user_datastore.get_user(current_user.id) if check_permission.check_seq_permissions(current_user.id, seq): can_edit = True if seq.owner == user: self_assigned = True else: if seq.owner != '' and seq.owner is not None: other_user = True if seq.owner is None: owner = '' else: owner = f"{seq.owner.first_name} {seq.owner.last_name}, {seq.owner.affiliation}" other_names = '' for i, name in enumerate(seq.other_names): other_names += name if (len(seq.other_names) > 1) and (i < len(seq.other_names) - 1): other_names += ', ' other_identifiers = '' for i, ident in enumerate(seq.other_identifiers): other_identifiers += ident if (len(seq.other_identifiers) > 1) and (i < len(seq.other_identifiers) - 1): other_identifiers += ', ' enzyme_type_full = EnzymeType.objects( enzyme_type=seq.enzyme_type)[0].full_name if seq.n_tag is None: seq.n_tag = '' if seq.c_tag is None: seq.c_tag = '' if seq.pdb is None: seq.pdb = '' result = { 'enzyme_type': seq.enzyme_type, 'enzyme_name': seq.enzyme_name, 'sequence': seq.sequence, 'sequence_unavailable': seq.sequence_unavailable, 'n_tag': seq.n_tag, 'c_tag': seq.c_tag, 'accession': seq.accession, 'other_identifiers': other_identifiers, 'pdb': seq.pdb, 'mutant_of': seq.mutant_of, 'sequences': seq_array, 'notes': seq.notes, 'bioinformatics_ignore': seq.bioinformatics_ignore, 'can_edit': can_edit, 'self_assigned': self_assigned, 'owner_is_another_user': other_user, 'other_names': other_names, 'owner': owner, 'enzyme_type_full': enzyme_type_full } return jsonify(result=result)