def delete_sequence(): to_delete = request.form['to_delete'] seq = Sequence.objects(enzyme_name=to_delete)[0] acts = Activity.objects(enzyme_name=to_delete) status = 'success' msg = 'Sequence deleted' issues = [] if len(acts) != 0: status = 'danger' msg = 'Could not delete' papers = [] for act in acts: if act.short_citation not in papers: papers.append(act.short_citation) for paper in papers: issues.append(f"Sequence is recorded in activity data for {paper}") mutants = Sequence.objects(mutant_of=to_delete) if len(mutants) != 0: status = 'danger' msg = 'Could not delete' for mut in mutants: issues.append(f"Sequence is a parent of mutant {mut.enzyme_name}") if status == 'success': seq.delete() result = {'status': status, 'msg': msg, 'issues': issues} return jsonify(result=result)
def merge_sequences(): to_merge = request.form['to_merge'] merge_with = request.form['merge_with'] status = 'success' msg = 'Sequences merged' issues = [] if to_merge != merge_with: seq = Sequence.objects(enzyme_name=to_merge)[0] seq_merge = Sequence.objects(enzyme_name=merge_with)[0] if seq.enzyme_type == seq_merge.enzyme_type: for paper in seq.papers: seq_merge.papers.append(paper) acts = Activity.objects(enzyme_name=to_merge) for act in acts: act.enzyme_name = seq_merge.enzyme_name act.save() seq.delete() seq_merge.other_names.append(to_merge) seq_merge.save() update_seq_papers_status(merge_with) else: status = 'danger' msg = 'Could not merge sequences' issues.append('Enzyme types must be the same') else: status = 'danger' msg = 'Could not merge sequences' issues.append('Cannot merge with self') result = {'status': status, 'msg': msg, 'issues': issues} return jsonify(result=result)
def inject_login_mode(): inject_dict = {} inject_dict['login_mode'] = app.config['USE_EMAIL_CONFIRMATION'] if current_user.is_authenticated: user = User.objects(id=current_user.id).select_related()[0] if user.has_role('enzyme_teams') and user.enzyme_teams is not None: inject_dict['enzyme_teams'] = [enz_type_obj.enzyme_type for enz_type_obj in user.enzyme_teams] if user.has_role('enzyme_champion') and user.enzyme_champion is not None: inject_dict['enzyme_champion'] = [enz_type_obj.enzyme_type for enz_type_obj in user.enzyme_champion] if user.has_role('contributor'): inject_dict['user_papers_need_data'] = len(Paper.objects(Q(owner=user) & (Q(status='Data required') | Q(status='Enzymes need protein sequences') | Q(status='Issues need to be resolved')))) inject_dict['user_seqs_need_data'] = len(Sequence.objects(Q(owner=user) & ((Q(sequence=None)|Q(sequence='')) & (Q(sequence_unavailable__ne=True))))) inject_dict['total_team_notifications'] = 0 inject_dict['team_notifications'] = {} inject_dict['champ_seq_notifications'] = {} inject_dict['champ_notifications'] = {} if 'enzyme_teams' in inject_dict: for enz_type in inject_dict['enzyme_teams']: num_papers = len(Paper.objects(Q(tags=enz_type) & Q(owner=None) & (Q(status='Data required') | Q(status='Enzymes need protein sequences')))) inject_dict['team_notifications'][enz_type] = num_papers inject_dict['total_team_notifications'] += num_papers if 'enzyme_champion' in inject_dict: for enz_type in inject_dict['enzyme_champion']: num_papers = len(Paper.objects(Q(tags=enz_type) & Q(status='Complete - Awaiting review'))) num_seqs = len(Sequence.objects(Q(enzyme_type=enz_type) & ((Q(sequence=None)|Q(sequence='')) & (Q(sequence_unavailable__ne=True))))) inject_dict['champ_notifications'][enz_type] = num_papers inject_dict['champ_seq_notifications'][enz_type] = num_seqs inject_dict['total_team_notifications'] += num_papers + num_seqs return inject_dict
def clear_databases(): EnzymeType.drop_collection() Sequence.drop_collection() Paper.drop_collection() Reaction.drop_collection() Activity.drop_collection() Molecule.drop_collection()
def set_choices(self): self.enzyme_type.choices = [ (c, c) for c in ['All'] + (list(Sequence.objects().distinct('enzyme_type'))) ] self.enzyme_name.choices = [ (c, c) for c in ['All'] + (list(Sequence.objects().distinct('enzyme_name'))) ]
def task_search_for_orphan_enzymes(): activity_enzyme_names = list( set(Activity.objects().distinct('enzyme_name'))) for name in activity_enzyme_names: if len(Sequence.objects(enzyme_name=name)) == 0: enzyme_type = Activity.objects(enzyme_name=name)[0].enzyme_type new_seq = Sequence(enzyme_name=name, enzyme_type=enzyme_type) new_seq.save() print( f"found orphan enzyme, added sequence entry for {name} - {enzyme_type}" )
def seqs_of_type(enzyme_type): if enzyme_type == 'All': sequences = Sequence.objects().distinct('enzyme_name') sequences.sort() else: sequences = Sequence.objects( enzyme_type=enzyme_type).distinct('enzyme_name') sequences.sort() seq_array = {} for seq in sequences: seq_array[seq] = seq result = {'sequences': seq_array} return result
def submission_main_page(paper_id): user = user_datastore.get_user(current_user.id) paper_query = Paper.objects(id=paper_id).select_related() if len(paper_query) == 0: flash('Paper has not been added yet, please add to the database first', 'fail') return redirect(url_for("biocatdb.launch_add_paper")) paper = paper_query[0] if not check_permission.check_paper_permission(current_user.id, paper): flash('No access to edit this entry', 'fail') return redirect(url_for("biocatdb.launch_add_paper")) paper_data = get_paper_data(paper, user) activity_data = get_activity_data(paper) reactions = list(Reaction.objects().distinct('name')) enzyme_names = list(Sequence.objects(papers=paper).distinct('enzyme_name')) enzyme_types = list(EnzymeType.objects().distinct('enzyme_type')) enzyme_data = sequence_table.get_enzyme_data(db.Q(papers=paper)) enzyme_types_in_paper = list(Sequence.objects(papers=paper).distinct('enzyme_type')) reactions_in_paper = list(Reaction.objects(enzyme_types__in=enzyme_types_in_paper).distinct('name')) reactions_in_activity = list(Activity.objects(paper=paper).distinct('reaction')) status_dict = get_status(paper, user) comments = get_comments(paper, user) paper_molecules = get_paper_molecules(paper) admin_panel = False admin_dict = {} if current_user.has_role('admin'): admin_panel = True admin_dict = get_admin_dict(paper) reactions_ordered = reactions_in_activity + [r for r in reactions_in_paper if r not in reactions_in_activity] reactions_ordered += [r for r in reactions_in_paper if r not in reactions_ordered] reactions_ordered += [r for r in reactions if r not in reactions_ordered] return render_template('data_submission/submission_main_page.html', paper=paper_data, activity_data=activity_data, seq_data=enzyme_data, seq_button_columns=['edit', 'remove', 'papers'], status=status_dict, seq_table_height='60vh', enzyme_types=enzyme_types, show_header_filters=False, include_owner=True, lock_enz_type='false', reactions=reactions_ordered, enzyme_names=enzyme_names+['Chemical'], doi=paper.doi, comments=comments, paper_molecules=paper_molecules, admin_panel=admin_panel, admin_dict=admin_dict, enzyme_reactions=reactions_in_paper)
def load_sequence_papers(): enzyme_name = request.form['name'] seq = Sequence.objects(enzyme_name=enzyme_name).select_related()[0] papers_list = [] for paper in seq.papers: paper_dict = {} paper_dict['_id'] = str(paper.id) paper_dict['short_citation'] = paper.short_citation paper_dict['doi'] = paper.doi paper_dict['title'] = paper.title if current_user.is_authenticated: if check_permission.check_seq_permissions(current_user.id, seq): paper_dict['can_edit'] = "True" else: paper_dict['can_edit'] = "False" else: paper_dict['can_edit'] = "False" papers_list.append(paper_dict) result = {'papers': papers_list} return jsonify(result=result)
def clear_all_bioinformatics_data(): enzyme_types = EnzymeType.objects() seqs = Sequence.objects() for enz in enzyme_types: enz.bioinformatics_status = 'Idle' enz.save() for seq in seqs: seq.blast = None seq.alignments_made = None seq.save() UniRef50.drop_collection() SSN_record.drop_collection() UniRef90.drop_collection() Alignment.drop_collection() SeqSimNet.drop_collection() analysis_data_ssn = str(Path(__file__).parents[3]) + f'/analysis/analysis_data/ssn' analysis_data_aba = str(Path(__file__).parents[3]) + f'/analysis/analysis_data/all_by_all_blast' shutil.rmtree(analysis_data_ssn) shutil.rmtree(analysis_data_aba) os.mkdir(analysis_data_ssn) os.mkdir(analysis_data_aba) print('ALL BIOINFORMATICS DATA DELETED') result = {'status': 'success', 'msg': f"Done", 'issues': []} return jsonify(result=result)
def get_enzyme_data(query): seq_fields = [ 'id', 'enzyme_type', 'enzyme_name', 'sequence', 'sequence_unavailable', 'accession', 'pdb', 'mutant_of', 'notes', 'papers', 'owner', 'other_names' ] enzyme_data = list( Sequence.objects(query).only( *seq_fields).order_by('enzyme_type').as_pymongo()) owners_dict = {} for i, data in enumerate(enzyme_data): enzyme_data[i]['_id'] = str(enzyme_data[i]['_id']) enzyme_data[i]['sequence_unavailable'] = str( enzyme_data[i]['sequence_unavailable']).replace('False', '') if 'papers' not in enzyme_data[i]: enzyme_data[i]['papers'] = 0 else: enzyme_data[i]['papers'] = len(enzyme_data[i]['papers']) if 'owner' in enzyme_data[i]: owner_id = str(enzyme_data[i]['owner']) if owner_id not in owners_dict: owner = User.objects(id=enzyme_data[i]['owner'])[0] owners_dict[owner_id] = f"{owner.first_name} {owner.last_name}" enzyme_data[i]['owner'] = owners_dict[owner_id] else: enzyme_data[i]['owner'] = '' if 'sequence' in enzyme_data[i]: if len(enzyme_data[i]['sequence']) > 50: enzyme_data[i][ 'sequence'] = enzyme_data[i]['sequence'][0:50] + "..." return enzyme_data
def papers_with_orhpan_sequences(): title = "Papers with orphan sequences" activity_enzyme_names = list( set(Activity.objects().distinct('enzyme_name'))) paper_ids = [] for name in activity_enzyme_names: if len(Sequence.objects(enzyme_name=name)) == 0: act = Activity.objects(enzyme_name=name)[0] paper_ids.append(act.paper) papers_data = list( Paper.objects(id__in=paper_ids).only( *papers_table.PAPERS_TABLE_FIELDS).order_by( '-status').as_pymongo()) papers_data = papers_table.process_papers_dict(papers_data, show_owner=False) return render_template('edit_tables/edit_papers.html', papers_data=papers_data, papers_table_height='80vh', papers_button_columns=['delete', 'edit'], show_owner=True, title=title, row_click_modal=False)
def nodes_not_present(self, only_biocatdb=False, max_num=None): """ Return a list of enzymes which are not in the ssn """ # Get a list of all sequence objects of enzyme type t0 = time.time() sequences = Sequence.objects( db.Q(enzyme_type=self.enzyme_type) & db.Q(sequence__ne="") & db.Q(sequence__ne=None) & db.Q(sequence_unavailable__ne=True)) if only_biocatdb is True: seq_objects = list(sequences) else: unirefs = UniRef50.objects(enzyme_type=self.enzyme_type_obj) seq_objects = list(sequences) + list(unirefs) # Get sequences not in nodes not_in_nodes = [] for seq_obj in seq_objects: if seq_obj.enzyme_name not in list(self.graph.nodes): if seq_obj.sequence != None: if len(seq_obj.sequence) > 12: not_in_nodes.append(seq_obj) # Return only up to the maximum number of sequences if max_num != None: if len(not_in_nodes) > max_num: not_in_nodes = not_in_nodes[0:max_num] t1 = time.time() self.log( f"Identified {len(not_in_nodes)} {self.enzyme_type} proteins which need adding, in {round(t1 - t0, 1)} seconds" ) return not_in_nodes
def find_tags(): seqs = Sequence.objects() n_tags = Tag.objects(n_term=True).distinct('seq') n_tags = sorted(n_tags, key=len, reverse=True) c_tags = Tag.objects(c_term=True).distinct('seq') c_tags = sorted(c_tags, key=len, reverse=True) print(n_tags) for seq in seqs: for n_tag in n_tags: if n_tag == seq.sequence[0:len(n_tag)]: seq.n_tag = n_tag seq.sequence = seq.sequence[len(n_tag):] if seq.sequence[0] != 'M': seq.sequence = 'M' + seq.sequence print(f"Found N term: {n_tag}") print(f"Removed from seq: {seq.sequence}") for c_tag in c_tags: if c_tag == seq.sequence[-len(c_tag):]: seq.c_tag = c_tag seq.sequence = seq.sequence[:-len(c_tag):] print(f"Found C term: {c_tag}") print(f"Removed from seq: {seq.sequence}") seq.save() result = {'status': 'success', 'msg': 'Searching for tags', 'issues': []} return jsonify(result=result)
def task_check_ssn_status(): for enzyme_type in EnzymeType.objects(): ssn_query = list(SSN_record.objects(enzyme_type=enzyme_type)) if len(ssn_query) > 1: print( f'Warning - multiple ssn records for {enzyme_type} - deleting extras' ) for i in range(1, len(ssn_query)): ssn_query[i].delete() if len(current_app.blast_queue.jobs) + len( current_app.process_blasts_queue.jobs) + len( current_app.alignment_queue.jobs) == 0: print('Checking ssn status') ssn_records = SSN_record.objects().select_related() for ssn_r in ssn_records: if ssn_r.status != 'Complete' and ssn_r.enzyme_type.bioinformatics_status == 'Complete': if len(UniRef50.objects(enzyme_type=ssn_r.enzyme_type)) != 0: enzyme_type = ssn_r.enzyme_type.enzyme_type job_name = f"{enzyme_type}_expand_ssn" current_app.alignment_queue.enqueue( ssn_tasks.task_expand_ssn, enzyme_type, job_id=job_name) print(f'Queued SSN job for {enzyme_type}') for enz_type_obj in EnzymeType.objects(): if enz_type_obj.bioinformatics_status == 'Complete': if enz_type_obj not in SSN_record.objects().distinct( 'enzyme_type'): unirefs = UniRef50.objects(enzyme_type=enz_type_obj) biocatdb_seqs = list( Sequence.objects( db.Q(enzyme_type=enz_type_obj.enzyme_type) & db.Q(bioinformatics_ignore__ne=True))) biocatdb_seqs = [ seq for seq in biocatdb_seqs if seq.sequence != '' and seq.sequence is not None ] if len(unirefs) + len(biocatdb_seqs) != 0: print( f"No SSN for {enz_type_obj.enzyme_type}, but blasts are complete and sequences present.. creating SSN." ) job_name = f"{enz_type_obj.enzyme_type}_expand_ssn" current_app.alignment_queue.enqueue( ssn_tasks.task_expand_ssn, enz_type_obj.enzyme_type, job_id=job_name) else: print(f"Length blast queue = {len(current_app.blast_queue.jobs)}") print( f"Length process blast queue = {len(current_app.process_blasts_queue.jobs)}" ) print( f"Length alignment queue = {len(current_app.alignment_queue.jobs)}" )
def set_up_blast_job(enzyme_name): current_app.app_context().push() seq = Sequence.objects(db.Q(enzyme_name=enzyme_name))[0] if seq.blast is None: print(f'Starting blast for sequence: {seq.enzyme_name}') output = BlastRunner().run(seq.sequence) current_app.process_blasts_queue.enqueue(parse_blast_results, enzyme_name, output)
def task_delete_sequences_no_paper(): print('Deleting sequences with no papers') count = 0 seqs = Sequence.objects() for seq in seqs: if len(seq.papers) == 0: seq.delete() count += 1 print(f"Deleted {count} sequences")
def bioinformatics_admin_page(): enzyme_types = EnzymeType.objects().order_by('enzyme_type') biostat = {} ssn = {} for enz_type_obj in enzyme_types: enz_type = enz_type_obj.enzyme_type biostat[enz_type] = enz_type_obj.bioinformatics_status q = SSN_record.objects(enzyme_type=enz_type_obj) if len(q) != 0: ssn[enz_type] = q[0].status else: ssn[enz_type] = 'None' enzyme_numbers = {} for enz_type_obj in enzyme_types: enz_type = enz_type_obj.enzyme_type enzyme_numbers[enz_type] = {} enzyme_numbers[enz_type]['biocatdb'] = len(Sequence.objects(enzyme_type=enz_type)) enzyme_numbers[enz_type]['uniref'] = len(UniRef50.objects(enzyme_type=enz_type_obj)) enz_type_dict = {} for enz_type_obj in enzyme_types: enz_type = enz_type_obj.enzyme_type enz_type_dict[enz_type] = 0 seqs = Sequence.objects(enzyme_type=enz_type) if len(seqs) != 0: for seq in seqs: if seq.blast is not None: enz_type_dict[enz_type] += 1 if enz_type_dict[enz_type] != 0: enz_type_dict[enz_type] = round((enz_type_dict[enz_type]/len(seqs))*100, 0) registry = StartedJobRegistry(queue=current_app.blast_queue) num_jobs = registry.count return render_template('bioinformatics/bioinformatics_admin.html', blasted_enz_types=enz_type_dict, biostat=biostat, ssn=ssn, num_jobs=num_jobs, enzyme_numbers=enzyme_numbers)
def parse_blast_results(enzyme_name, output): current_app.app_context().push() seq = Sequence.objects(db.Q(enzyme_name=enzyme_name))[0] if output is not None: BlastParser().parse(output, seq) seq.blast = datetime.datetime.now() seq.save() print(f'Finished blast of sequence {seq.enzyme_name}') current_app.task_queue.enqueue(check_blast_status, seq.enzyme_type)
def unassign_seqs_in_paper(user, paper): seqs = Sequence.objects(papers=paper).select_related() for seq in seqs: user_owns_another_paper = False for seq_paper in seq.papers: if seq_paper.owner == user: user_owns_another_paper = True break if user_owns_another_paper == False: seq.owner = None seq.save()
def filter_out_mutants(self): t0 = time.time() mutants = Sequence.objects( db.Q(enzyme_type=self.enzyme_type) & (db.Q(mutant_of__ne='') & db.Q(mutant_of__ne=None))).distinct('enzyme_name') for mutant in list(mutants): if mutant in self.graph.nodes: self.graph.remove_node(mutant) t1 = time.time() self.log(f'Filtered mutants from graph in {round(t1 - t0, 1)} seconds')
def update_seq_papers_status(enzyme_name): seq = Sequence.objects(enzyme_name=enzyme_name).select_related()[0] for paper in seq.papers: papers_functions.tag_paper_with_enzyme_types(paper) paper_progress_text, paper_progress = paper_status.paper_metadata_status( paper) sequence_progress_text, sequence_progress = paper_status.sequences_status( paper) activity_progress_text, activity_progress = paper_status.activity_status( paper) status, status_colour = paper_status.get_status( paper_progress, sequence_progress, activity_progress, paper) paper.status = status paper.save()
def admin_all_seqs_to_owner(): paper = Paper.objects(id=request.form['paper_id']).select_related()[0] seqs = Sequence.objects(db.Q(papers=paper)) for seq in seqs: seq.owner = paper.owner seq.save() result = { 'status': 'success', 'msg': 'All sequences assigned to paper owner', 'issues': [] } return jsonify(result=result)
def _make_db_fasta(self): """ Create a fasta file containing all the sequences of an enzyme type """ seqs = Sequence.objects( db.Q(enzyme_type=self.enzyme_type) & db.Q(sequence__ne="") & db.Q(sequence__ne=None) & db.Q(sequence_unavailable__ne=True)) bioinf_seqs = UniRef50.objects(db.Q(enzyme_type=self.enzyme_type_obj)) with open(f"{self.directory}/{self.enz_type_dir_name}.fasta", 'w') as file: for seq in list(seqs) + list(bioinf_seqs): name = seq.enzyme_name seq = seq.sequence.replace('\n', '') file.write(f'>{name}\n') file.write(f"{seq}\n")
def clear_empty_ssns(): ssn_records = SSN_record.objects().select_related() for ssn_r in ssn_records: enzyme_type_obj = ssn_r.enzyme_type unirefs = UniRef50.objects(enzyme_type=enzyme_type_obj) biocat_seqs = Sequence.objects( db.Q(enzyme_type=enzyme_type_obj.enzyme_type) & db.Q(sequence__ne="") & db.Q(sequence__ne=None) & db.Q(sequence_unavailable__ne=True)) if len(unirefs) + len(biocat_seqs) == 0: ssn_r.delete() result = {'status': 'success', 'msg': f'Empty SSNs removed', 'issues': []} return jsonify(result=result)
def convert_to_pdb_schema(): seqs = Sequence.objects() for seq in seqs: if seq.structure == True: seq.pdb = seq.accession seq.accession = '' seq.structure = None seq.save() result = { 'status': 'success', 'msg': 'Coverting to pdb schema', 'issues': [] } return jsonify(result=result)
def task_add_sequence_data(df): users = User.objects() for i, row in df.iterrows(): seq_query = Sequence.objects(enzyme_name=row['enzyme_name']) if len(seq_query) != 0: seq = seq_query[0] if row['sequence'] != '' and row['sequence'] is not None: seq.sequence = str(row['sequence']) for user in users: usernames = get_usernames(user) if does_username_match(usernames, row['added_by']): seq.added_by = user seq.owner = user seq.save()
def set_all_seqs_to_reblast(): seqs = Sequence.objects() for seq in seqs: seq.blast = None seq.save() for enz_type_obj in EnzymeType.objects(): enz_type_obj.bioinformatics_status = 'Queued for update' enz_type_obj.save() result = { 'status': 'success', 'msg': f'Bioinformatics status reset', 'issues': [] } return jsonify(result=result)
def change_sequence_assign(): original_name = request.form['original_name'] self_assigned = bool(strtobool(request.form['self_assigned'])) print(self_assigned) user = user_datastore.get_user(current_user.id) seq = Sequence.objects(enzyme_name=original_name)[0] if (seq.owner == user) and (self_assigned is False): seq.owner = None seq.save() elif (seq.owner == None) and (self_assigned == True): seq.owner = user seq.save() result = {'status': 'success', 'msg': 'Sequence assigned', 'issues': []} return jsonify(result=result)
def change_enzyme_type_name(enz_type, new_name): print("Updating enzyme types..") old_name = enz_type.enzyme_type for seq in Sequence.objects(enzyme_type=old_name): seq.enzyme_type = new_name seq.save() for reaction in Reaction.objects(enzyme_types=old_name): reaction.enzyme_types.remove(old_name) reaction.enzyme_types.append(new_name) reaction.cofactors[new_name] = reaction.cofactors.pop(old_name) reaction.save() for activity in Activity.objects(enzyme_type=old_name): activity.enzyme_type = new_name activity.save() print('..done')