def save_enzyme_type_changes(): original_name = request.form['original_name'] new_name = request.form['new_name'] description = request.form['description'] full_name = request.form['full_name'] other_abbreviations = request.form['other_abbreviations'] if other_abbreviations == '': other_abbreviations = None else: other_abbreviations = other_abbreviations.split(', ') enz_type = EnzymeType.objects(enzyme_type=original_name)[0] enz_type.description = description enz_type.full_name = full_name enz_type.other_abbreviations = other_abbreviations if new_name != original_name: if new_name not in list(EnzymeType.objects().distinct('enzyme_type')): change_enzyme_type_name(enz_type, new_name) enz_type.enzyme_type = new_name else: result = { 'status': 'danger', 'msg': 'New name is already taken', 'issues': [] } return jsonify(result=result) enz_type.save() result = {'status': 'success', 'msg': 'Changes saved', 'issues': []} return jsonify(result=result)
def task_check_ssn_status(): for enzyme_type in EnzymeType.objects(): ssn_query = list(SSN_record.objects(enzyme_type=enzyme_type)) if len(ssn_query) > 1: print( f'Warning - multiple ssn records for {enzyme_type} - deleting extras' ) for i in range(1, len(ssn_query)): ssn_query[i].delete() if len(current_app.blast_queue.jobs) + len( current_app.process_blasts_queue.jobs) + len( current_app.alignment_queue.jobs) == 0: print('Checking ssn status') ssn_records = SSN_record.objects().select_related() for ssn_r in ssn_records: if ssn_r.status != 'Complete' and ssn_r.enzyme_type.bioinformatics_status == 'Complete': if len(UniRef50.objects(enzyme_type=ssn_r.enzyme_type)) != 0: enzyme_type = ssn_r.enzyme_type.enzyme_type job_name = f"{enzyme_type}_expand_ssn" current_app.alignment_queue.enqueue( ssn_tasks.task_expand_ssn, enzyme_type, job_id=job_name) print(f'Queued SSN job for {enzyme_type}') for enz_type_obj in EnzymeType.objects(): if enz_type_obj.bioinformatics_status == 'Complete': if enz_type_obj not in SSN_record.objects().distinct( 'enzyme_type'): unirefs = UniRef50.objects(enzyme_type=enz_type_obj) biocatdb_seqs = list( Sequence.objects( db.Q(enzyme_type=enz_type_obj.enzyme_type) & db.Q(bioinformatics_ignore__ne=True))) biocatdb_seqs = [ seq for seq in biocatdb_seqs if seq.sequence != '' and seq.sequence is not None ] if len(unirefs) + len(biocatdb_seqs) != 0: print( f"No SSN for {enz_type_obj.enzyme_type}, but blasts are complete and sequences present.. creating SSN." ) job_name = f"{enz_type_obj.enzyme_type}_expand_ssn" current_app.alignment_queue.enqueue( ssn_tasks.task_expand_ssn, enz_type_obj.enzyme_type, job_id=job_name) else: print(f"Length blast queue = {len(current_app.blast_queue.jobs)}") print( f"Length process blast queue = {len(current_app.process_blasts_queue.jobs)}" ) print( f"Length alignment queue = {len(current_app.alignment_queue.jobs)}" )
def load_into_mongo(yaml_dict): for rxn_name in yaml_dict: enzymes = [] for enz in list(yaml_dict[rxn_name]['enzymes'].keys()): enzymes.append(enz) if len(EnzymeType.objects(enzyme_type=enz)) == 0: new_type = EnzymeType(enzyme_type=enz, description='') new_type.save() if len(Reaction.objects(name=rxn_name)) == 0: reaction = Reaction(name=rxn_name, smarts=yaml_dict[rxn_name]['smarts'], enzyme_types=enzymes, cofactors=yaml_dict[rxn_name]['enzymes'], positive_tests=yaml_dict[rxn_name]['positive_tests'], negative_tests=yaml_dict[rxn_name]['negative_tests'], type=yaml_dict[rxn_name]['type']) if 'experimental' in yaml_dict[rxn_name]: reaction.experimental = bool(yaml_dict[rxn_name]['experimental']) if 'two_step' in yaml_dict[rxn_name]: reaction.two_step = bool(yaml_dict[rxn_name]['two_step']) reaction.save()
def high_importance_papers(): hi_papers = Paper.objects(high_importance=True).select_related() enzyme_types = EnzymeType.objects() tags = [] for paper in hi_papers: for tag in paper.tags: if tag not in tags: tags.append(str(tag)) tags = sorted(tags) data_by_tag = {} for tag in tags: hi_q = db.Q(high_importance=True) tag_q = db.Q(tags=tag) papers_data = list( Paper.objects(hi_q & tag_q).only( *papers_table.PAPERS_TABLE_FIELDS).order_by( '-status').as_pymongo()) papers_data = papers_table.process_papers_dict(papers_data, show_owner=False) data_by_tag[tag] = papers_data enzyme_full_names = {} for enz_type in enzyme_types: enzyme_full_names[enz_type.enzyme_type] = enz_type.full_name return render_template('edit_tables/high_importance_papers.html', data_by_tag=data_by_tag, tags=tags, enzyme_full_names=enzyme_full_names)
def _test_cofactors_dict(self, cofactors): try: cofactors_dict = yaml.load(cofactors, Loader=yaml.FullLoader) except: self.state = 'danger' self.issues.append('Could not load cofactors yaml') return if cofactors_dict is None: self.state = 'danger' self.issues.append( 'At least one entry must be made for enzymes/cofactors') return for enz in list(cofactors_dict.keys()): if enz not in list(EnzymeType.objects().distinct('enzyme_type')): self.state = 'danger' self.issues.append(f'Enzyme type {enz} is not defined') elif type(cofactors_dict[enz]) != dict: self.state = 'danger' self.issues.append( f'Cofactors for {enz} are not structured as a dictionary') elif 'cofactors_plus' not in cofactors_dict[ enz] or 'cofactors_minus' not in cofactors_dict[enz]: self.state = 'danger' self.issues.append(f'Cofactors must be defined') elif type(cofactors_dict[enz]['cofactors_plus']) != list or type( cofactors_dict[enz]['cofactors_minus']) != list: self.state = 'danger' self.issues.append(f'Cofactors must be given as lists')
def clear_all_bioinformatics_data(): enzyme_types = EnzymeType.objects() seqs = Sequence.objects() for enz in enzyme_types: enz.bioinformatics_status = 'Idle' enz.save() for seq in seqs: seq.blast = None seq.alignments_made = None seq.save() UniRef50.drop_collection() SSN_record.drop_collection() UniRef90.drop_collection() Alignment.drop_collection() SeqSimNet.drop_collection() analysis_data_ssn = str(Path(__file__).parents[3]) + f'/analysis/analysis_data/ssn' analysis_data_aba = str(Path(__file__).parents[3]) + f'/analysis/analysis_data/all_by_all_blast' shutil.rmtree(analysis_data_ssn) shutil.rmtree(analysis_data_aba) os.mkdir(analysis_data_ssn) os.mkdir(analysis_data_aba) print('ALL BIOINFORMATICS DATA DELETED') result = {'status': 'success', 'msg': f"Done", 'issues': []} return jsonify(result=result)
def __init__(self, enzyme_type, aba_blaster=None, log_level=0): self.graph = nx.Graph() self.min_score = 40 self.enzyme_type = enzyme_type self.enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] if aba_blaster is None: self.aba_blaster = AllByAllBlaster(enzyme_type, log_level=log_level) else: self.aba_blaster = aba_blaster self.node_metadata = {} self.log_level = log_level self.save_path = str(Path( __file__).parents[0]) + f'/analysis_data/ssn/{self.enzyme_type}' if not os.path.exists(self.save_path): os.mkdir(self.save_path) self.db_object = self._get_db_object() self.log(f"SSN object initialised for {enzyme_type}")
def enzyme_champion_seq(enzyme_type): user = user_datastore.get_user(current_user.id) enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] if enzyme_type_obj not in user.enzyme_champion: flash('No access', 'danger') return redirect(url_for('main_site.home')) query = db.Q(enzyme_type=enzyme_type) enzyme_data = sequence_table.get_enzyme_data(query) enzyme_types = sorted(list(EnzymeType.objects().distinct("enzyme_type"))) return render_template('edit_tables/edit_sequences.html', seq_data=enzyme_data, seq_button_columns=['edit', 'merge', 'delete', 'papers'], seq_table_height='80vh', enzyme_types=enzyme_types, show_header_filters=True, include_owner=True, lock_enz_type='true', title=f"Enzyme champion for {enzyme_type} sequences", row_click_modal=False)
def edit_enzyme_types(): headings = ['Name', 'Full name', 'Other abbreviations', 'Description', 'Num rules'] enzyme_types = EnzymeType.objects().distinct("enzyme_type") enzyme_types.sort() enz_type_dict_list = EnzymeType.objects().order_by('enzyme_type').as_pymongo() renamed_enz_type_dict_list = [] for enz_type_dict in enz_type_dict_list: new_enz_type_dict = {} enz_type = enz_type_dict.get('enzyme_type') new_enz_type_dict['Name'] = enz_type new_enz_type_dict['Full name'] = enz_type_dict.get('full_name', '') new_enz_type_dict['Other abbreviations'] = enz_type_dict.get('other_abbreviations', '') new_enz_type_dict['Description'] = enz_type_dict.get('description', '') new_enz_type_dict['Num rules'] = len(Reaction.objects(enzyme_types=enz_type)) renamed_enz_type_dict_list.append(new_enz_type_dict) return render_template('enzyme_type/edit_enzyme_types.html', headings=headings, rows=renamed_enz_type_dict_list, enzyme_types=enzyme_types)
def edit_sequences(): query = db.Q() enzyme_data = sequence_table.get_enzyme_data(query) enzyme_types = sorted(list(EnzymeType.objects().distinct("enzyme_type"))) return render_template('edit_tables/edit_sequences.html', seq_data=enzyme_data, seq_button_columns=['edit', 'merge', 'delete', 'papers'], seq_table_height='80vh', enzyme_types=enzyme_types, show_header_filters=True, include_owner=True, lock_enz_type='false', title="Super contributor access to all sequences", row_click_modal=False)
def load_enzyme_type_data(): name = request.form['enzyme_type'] enz_type = EnzymeType.objects(enzyme_type=name)[0] result = { 'name': enz_type.enzyme_type, 'description': enz_type.description, 'full_name': enz_type.full_name, 'other_abbreviations': enz_type.other_abbreviations } return jsonify(result=result)
def find_allhomologs(): enzyme_types = EnzymeType.objects().distinct('enzyme_type') for enzyme_type in enzyme_types: embl_restfull.set_blast_jobs(enzyme_type) result = {'status': 'success', 'msg': f"Started job to blast all enzyme_type's", 'issues': []} return jsonify(result=result)
def my_sequences(): user = user_datastore.get_user(current_user.id) query = db.Q(owner=user) enzyme_data = sequence_table.get_enzyme_data(query) enzyme_types = sorted(list(EnzymeType.objects().distinct("enzyme_type"))) return render_template('edit_tables/edit_sequences.html', seq_data=enzyme_data, seq_button_columns=['edit', 'delete', 'papers'], seq_table_height='80vh', enzyme_types=enzyme_types, show_header_filters=True, include_owner=True, lock_enz_type='false', title=f"Enzyme sequences assigned to {user.first_name} {user.last_name}", row_click_modal=False)
def submission_main_page(paper_id): user = user_datastore.get_user(current_user.id) paper_query = Paper.objects(id=paper_id).select_related() if len(paper_query) == 0: flash('Paper has not been added yet, please add to the database first', 'fail') return redirect(url_for("biocatdb.launch_add_paper")) paper = paper_query[0] if not check_permission.check_paper_permission(current_user.id, paper): flash('No access to edit this entry', 'fail') return redirect(url_for("biocatdb.launch_add_paper")) paper_data = get_paper_data(paper, user) activity_data = get_activity_data(paper) reactions = list(Reaction.objects().distinct('name')) enzyme_names = list(Sequence.objects(papers=paper).distinct('enzyme_name')) enzyme_types = list(EnzymeType.objects().distinct('enzyme_type')) enzyme_data = sequence_table.get_enzyme_data(db.Q(papers=paper)) enzyme_types_in_paper = list(Sequence.objects(papers=paper).distinct('enzyme_type')) reactions_in_paper = list(Reaction.objects(enzyme_types__in=enzyme_types_in_paper).distinct('name')) reactions_in_activity = list(Activity.objects(paper=paper).distinct('reaction')) status_dict = get_status(paper, user) comments = get_comments(paper, user) paper_molecules = get_paper_molecules(paper) admin_panel = False admin_dict = {} if current_user.has_role('admin'): admin_panel = True admin_dict = get_admin_dict(paper) reactions_ordered = reactions_in_activity + [r for r in reactions_in_paper if r not in reactions_in_activity] reactions_ordered += [r for r in reactions_in_paper if r not in reactions_ordered] reactions_ordered += [r for r in reactions if r not in reactions_ordered] return render_template('data_submission/submission_main_page.html', paper=paper_data, activity_data=activity_data, seq_data=enzyme_data, seq_button_columns=['edit', 'remove', 'papers'], status=status_dict, seq_table_height='60vh', enzyme_types=enzyme_types, show_header_filters=False, include_owner=True, lock_enz_type='false', reactions=reactions_ordered, enzyme_names=enzyme_names+['Chemical'], doi=paper.doi, comments=comments, paper_molecules=paper_molecules, admin_panel=admin_panel, admin_dict=admin_dict, enzyme_reactions=reactions_in_paper)
def enzyme_champion_papers(enzyme_type): user = user_datastore.get_user(current_user.id) enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] if enzyme_type_obj not in user.enzyme_champion: flash('No access', 'danger') return redirect(url_for('main_site.home')) papers_data = list(Paper.objects(tags=enzyme_type).only(*papers_table.PAPERS_TABLE_FIELDS).order_by('-status').as_pymongo()) papers_data = papers_table.process_papers_dict(papers_data) return render_template('edit_tables/edit_papers.html', papers_data=papers_data, papers_table_height='80vh', papers_button_columns=['delete', 'edit'], show_owner=True, title=f"Enzyme champion for {enzyme_type} papers", row_click_modal=False)
def set_all_seqs_to_reblast(): seqs = Sequence.objects() for seq in seqs: seq.blast = None seq.save() for enz_type_obj in EnzymeType.objects(): enz_type_obj.bioinformatics_status = 'Queued for update' enz_type_obj.save() result = { 'status': 'success', 'msg': f'Bioinformatics status reset', 'issues': [] } return jsonify(result=result)
def mark_not_aligned(): enzyme_type = request.form['enzyme_type'] sequences = Sequence.objects(enzyme_type=enzyme_type) for seq in sequences: seq.alignments_made = False seq.save() enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0] ssn_record.status = 'Queued for update' ssn_record.save() result = {'status': 'success', 'msg': f"Done", 'issues': []} return jsonify(result=result)
def set_choices(self): ssn_records = SSN_record.objects().distinct('enzyme_type') enzyme_types = EnzymeType.objects() list_enzyme_types = [] enzyme_descriptions = {} for enz_type in enzyme_types: if enz_type in ssn_records: enzyme_descriptions[ enz_type. enzyme_type] = f"{enz_type.enzyme_type} - {enz_type.full_name}" list_enzyme_types.append(enz_type.enzyme_type) list_enzyme_types = sorted(list_enzyme_types) self.enzyme_type.choices = [] for key in list_enzyme_types: self.enzyme_type.choices.append((key, enzyme_descriptions[key]))
def __init__(self, enzyme_type, log_level=0, num_threads=2): self.enzyme_type = enzyme_type self.enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] self.enz_type_dir_name = enzyme_type.replace(' ', '_') self.all_by_all_blast_folder = str( Path(__file__).parents[0]) + '/analysis_data/all_by_all_blast' self.directory = f"{self.all_by_all_blast_folder}/{self.enz_type_dir_name}" self.database = f"{self.directory}/{self.enz_type_dir_name}.fasta" self.cdhit_output = f"{self.directory}/cd_hit" self.num_threads = num_threads self.max_alignments = 10000 self.max_e = 5 self.min_coverage = 0.8 self.min_identity = 0.3 self.log_level = log_level
def load_uniref_data(): name = request.form['name'] enzyme_type = request.form['enzyme_type'] enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] et = db.Q(enzyme_type=enzyme_type_obj) nq = db.Q(enzyme_name=name) query = UniRef50.objects(et & nq) seq = query[0] protein_name = seq.protein_name organism = seq.tax uniprot_id = retrieve_uniref_info.strip_uniref_name(name) if uniprot_id[0:2] == 'UP': uniprot_id = "" ref_parser = retrieve_uniref_info.UniRef_Parser() ref_parser.load_xml(name) uni90, uni100, uniprot = ref_parser.get_uniref_members() cluster_id = ref_parser.get_cluster_name() num_uni90 = len(uni90) num_uni100 = len(uni100) num_uniprot = len(list(uniprot.keys())) if uniprot_id != "": prot_parser = retrieve_uniref_info.UniProt_Parser() prot_parser.load_xml(uniprot_id) pfams = prot_parser.get_pfams() else: pfams = [] result = { 'rep_seq_name': protein_name, 'rep_seq_organism': organism, 'rep_seq_uniprot_id': uniprot_id, 'cluster_id': cluster_id, 'num_uni90': num_uni90, 'num_uni100': num_uni100, 'num_uniprot': num_uniprot, 'pfam_object': pfams } return jsonify(result=result)
def task_check_blast_status(): if len(current_app.blast_queue.jobs) + len( current_app.process_blasts_queue.jobs) + len( current_app.alignment_queue.jobs) == 0: print('Checking blast status') enzyme_types = EnzymeType.objects() for enz_type in enzyme_types: embl_restfull.check_blast_status(enz_type.enzyme_type) if enz_type.bioinformatics_status != 'Complete': embl_restfull.set_blast_jobs(enz_type.enzyme_type) else: print(f"Length blast queue = {len(current_app.blast_queue.jobs)}") print( f"Length process blast queue = {len(current_app.process_blasts_queue.jobs)}" ) print( f"Length alignment queue = {len(current_app.alignment_queue.jobs)}" )
def delete_enzyme_types(): to_delete = request.form['to_delete'] enz_type = EnzymeType.objects(enzyme_type=to_delete)[0] seqs = Sequence.objects(enzyme_type=to_delete) reacs = Reaction.objects(enzyme_types=to_delete) acts = Activity.objects(enzyme_type=to_delete) status = 'success' msg = 'Enzyme type deleted' issues = [] if len(seqs) != 0: status = 'danger' msg = 'Could not delete' for seq in seqs: issues.append( f'Enzyme type is present in sequence: {seq.enzyme_name}') if len(reacs) != 0: status = 'danger' msg = 'Could not delete' for reac in reacs: issues.append(f'Enzyme type is present in reaction: {reac.name}') if len(acts) != 0: status = 'danger' msg = 'Could not delete' papers = [] for act in acts: if act.short_citation not in papers: papers.append(act.short_citation) for paper in papers: issues.append( f"Enzyme type is recorded in activity data for {paper}") if status == 'success': enz_type.delete() result = {'status': status, 'msg': msg, 'issues': issues} return jsonify(result=result)
def bioinformatics_admin_page(): enzyme_types = EnzymeType.objects().order_by('enzyme_type') biostat = {} ssn = {} for enz_type_obj in enzyme_types: enz_type = enz_type_obj.enzyme_type biostat[enz_type] = enz_type_obj.bioinformatics_status q = SSN_record.objects(enzyme_type=enz_type_obj) if len(q) != 0: ssn[enz_type] = q[0].status else: ssn[enz_type] = 'None' enzyme_numbers = {} for enz_type_obj in enzyme_types: enz_type = enz_type_obj.enzyme_type enzyme_numbers[enz_type] = {} enzyme_numbers[enz_type]['biocatdb'] = len(Sequence.objects(enzyme_type=enz_type)) enzyme_numbers[enz_type]['uniref'] = len(UniRef50.objects(enzyme_type=enz_type_obj)) enz_type_dict = {} for enz_type_obj in enzyme_types: enz_type = enz_type_obj.enzyme_type enz_type_dict[enz_type] = 0 seqs = Sequence.objects(enzyme_type=enz_type) if len(seqs) != 0: for seq in seqs: if seq.blast is not None: enz_type_dict[enz_type] += 1 if enz_type_dict[enz_type] != 0: enz_type_dict[enz_type] = round((enz_type_dict[enz_type]/len(seqs))*100, 0) registry = StartedJobRegistry(queue=current_app.blast_queue) num_jobs = registry.count return render_template('bioinformatics/bioinformatics_admin.html', blasted_enz_types=enz_type_dict, biostat=biostat, ssn=ssn, num_jobs=num_jobs, enzyme_numbers=enzyme_numbers)
def get_nodes_and_edges(enzyme_type, identity): enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] alignments = [] sequences = Sequence.objects( db.Q(sequence__ne=None) & db.Q(sequence__ne='') & db.Q(enzyme_type=enzyme_type)) unirefs = UniRef90.objects(enzyme_type=enz_type_obj) nodes = [] for seq_obj in list(unirefs) + list(sequences): nodes.append(new_node(seq_obj)) edges = [] for ali_obj in alignments: if ali_obj.identity >= identity and ali_obj.proteins[ 0].enzyme_name != ali_obj.proteins[1].enzyme_name: edges.append(new_edge(ali_obj)) return nodes, edges
def get_possible_enzymes(): reaction_node = request.form['reaction_node'] network_id = request.form['network_id'] data = json.loads(current_app.redis.get(network_id)) attr_dict = json.loads(data['attr_dict']) enzyme = attr_dict[reaction_node]['selected_enzyme'] possible_enzymes = attr_dict[reaction_node]['possible_enzymes'] if '' in possible_enzymes: possible_enzymes.remove('') choices = [] for enz in possible_enzymes: enz_full = EnzymeType.objects(enzyme_type=enz)[0].full_name choices.append((f"{enz}", f"{enz} - {enz_full}")) result = {'possible_enzymes': choices, 'selected_enzyme': enzyme} return jsonify(result=result)
def ssn_object(): enzyme_type = request.form['enzyme_type'] enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] ssn_obj = SSN_record.objects(enzyme_type=enzyme_type_obj)[0] num_biocatdb = Sequence.objects(enzyme_type=enzyme_type).count() num_uniref = UniRef50.objects(enzyme_type=enzyme_type_obj).count() precalc_choices = {} for score in ssn_obj.num_at_alignment_score: clusters = ssn_obj.num_at_alignment_score[score] idt = ssn_obj.identity_at_alignment_score[score] choice_text = f"{score}, {clusters} clusters, avg identity {idt[0]} ± {idt[1]}" precalc_choices[score] = choice_text result = {'status': ssn_obj.status, 'num_biocatdb': num_biocatdb, 'num_uniref': num_uniref, 'precalculated': precalc_choices} return jsonify(result=result)
def merge_enzyme_types(): to_merge = request.form['to_merge'] merge_with = request.form['merge_with'] if to_merge != merge_with: enz_type = EnzymeType.objects(enzyme_type=to_merge)[0] change_enzyme_type_name(enz_type, merge_with) enz_type.delete() result = { 'status': 'success', 'msg': 'Enzyme type merged', 'issues': [] } else: result = { 'status': 'danger', 'msg': "Can't merge with self", 'issues': [] } return jsonify(result=result)
def __init__(self, enzyme_type, hidden_edges=True, log_level=0): self.enzyme_type = enzyme_type self.enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] self.node_metadata = self._find_uniref_metadata() self.edge_colour = {'color': 'grey'} self.edge_width = 4 self.hidden_edges = hidden_edges self.uniref_border_width = 1 self.uniref_border_colour = 'black' self.biocatdb_border_width = 3 self.biocatdb_border_colour = 'darkred' self.border_width_selected = 4 self.opacity = 0.9 self.luminosity = 'bright' self.node_colour = f'rgba(5, 5, 168, {self.opacity})' self.node_size = 100 self.node_shape = 'dot' self.log_level = log_level self.cluster_positioner = ClusterPositioner()
def check_blast_status(enzyme_type): seqs = Sequence.objects( db.Q(enzyme_type=enzyme_type) & db.Q(bioinformatics_ignore__ne=True) & db.Q(reviewed=True)) enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0] all_complete = True for seq in seqs: if seq.blast is None: all_complete = False enz_type_obj.bioinformatics_status = 'Queued for update' enz_type_obj.save() if all_complete == True: if enz_type_obj.bioinformatics_status != 'Complete': enz_type_obj.bioinformatics_status = 'Complete' enz_type_obj.save() ssn_q = SSN_record.objects(enzyme_type=enz_type_obj) if len(ssn_q) == 1: ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0] ssn_record.status = 'Queued for update' ssn_record.save()
def parse(self, output, seq_obj): blast_record = output query_length = len(seq_obj.sequence) enzyme_type_obj = EnzymeType.objects( enzyme_type=seq_obj.enzyme_type)[0] for alignment in blast_record.alignments: identifier = alignment.hit_id.replace(self.identifier_head, '') if self._alignment_filters(alignment, query_length): db_query = UniRef50.objects( db.Q(enzyme_name=identifier) & db.Q(enzyme_type=enzyme_type_obj)).select_related() if len(db_query) == 0: protein_sequence = self._get_sequence(identifier) if self._sequence_filters(protein_sequence, query_length): self.log(f"Adding sequence for {identifier}") self._add_uniref(alignment, identifier, protein_sequence, enzyme_type_obj, seq_obj) else: uniref_obj = db_query[0] self._add_result_of_blasts_for(seq_obj, uniref_obj)