예제 #1
0
def full_uniref_check(enzyme_type_obj):
    unirefs = UniRef50.objects(enzyme_type=enzyme_type_obj).select_related()
    if len(unirefs) != 0:
        for ur in unirefs:
            print(f'Checking {ur.enzyme_name}..')
            ref_parser = UniRef_Parser()
            ref_parser.load_xml(ur.enzyme_name)
            time.sleep(0.2)

            if ref_parser.check_id_match(ur.enzyme_name) == False:
                print(
                    f"{ur.enzyme_name} doesnt match cluster id online, deleting.."
                )
                for seq in ur.result_of_blasts_for:
                    seq.blast = None
                    seq.save()
                ur.delete()

    ssn_query = SSN_record.objects(enzyme_type=enzyme_type_obj)
    if len(ssn_query) != 0:
        ssn_record = SSN_record.objects(enzyme_type=enzyme_type_obj)[0]
        ssn_record.status = 'Queued for update'
        ssn_record.save()

    enzyme_type_obj.bioinformatics_status = 'Queued for update'
    enzyme_type_obj.save()

    print(f"Full UniRef50 update complete for {enzyme_type_obj.enzyme_type}")
예제 #2
0
def clear_all_bioinformatics_data():
    enzyme_types = EnzymeType.objects()
    seqs = Sequence.objects()

    for enz in enzyme_types:
        enz.bioinformatics_status = 'Idle'
        enz.save()

    for seq in seqs:
        seq.blast = None
        seq.alignments_made = None
        seq.save()

    UniRef50.drop_collection()
    SSN_record.drop_collection()

    UniRef90.drop_collection()
    Alignment.drop_collection()
    SeqSimNet.drop_collection()

    analysis_data_ssn = str(Path(__file__).parents[3]) + f'/analysis/analysis_data/ssn'
    analysis_data_aba = str(Path(__file__).parents[3]) + f'/analysis/analysis_data/all_by_all_blast'
    shutil.rmtree(analysis_data_ssn)
    shutil.rmtree(analysis_data_aba)
    os.mkdir(analysis_data_ssn)
    os.mkdir(analysis_data_aba)

    print('ALL BIOINFORMATICS DATA DELETED')

    result = {'status': 'success',
              'msg': f"Done",
              'issues': []}

    return jsonify(result=result)
예제 #3
0
def task_check_ssn_status():
    for enzyme_type in EnzymeType.objects():
        ssn_query = list(SSN_record.objects(enzyme_type=enzyme_type))
        if len(ssn_query) > 1:
            print(
                f'Warning - multiple ssn records for {enzyme_type} - deleting extras'
            )
            for i in range(1, len(ssn_query)):
                ssn_query[i].delete()

    if len(current_app.blast_queue.jobs) + len(
            current_app.process_blasts_queue.jobs) + len(
                current_app.alignment_queue.jobs) == 0:
        print('Checking ssn status')
        ssn_records = SSN_record.objects().select_related()

        for ssn_r in ssn_records:
            if ssn_r.status != 'Complete' and ssn_r.enzyme_type.bioinformatics_status == 'Complete':
                if len(UniRef50.objects(enzyme_type=ssn_r.enzyme_type)) != 0:
                    enzyme_type = ssn_r.enzyme_type.enzyme_type
                    job_name = f"{enzyme_type}_expand_ssn"
                    current_app.alignment_queue.enqueue(
                        ssn_tasks.task_expand_ssn,
                        enzyme_type,
                        job_id=job_name)
                    print(f'Queued SSN job for {enzyme_type}')

        for enz_type_obj in EnzymeType.objects():
            if enz_type_obj.bioinformatics_status == 'Complete':
                if enz_type_obj not in SSN_record.objects().distinct(
                        'enzyme_type'):
                    unirefs = UniRef50.objects(enzyme_type=enz_type_obj)
                    biocatdb_seqs = list(
                        Sequence.objects(
                            db.Q(enzyme_type=enz_type_obj.enzyme_type)
                            & db.Q(bioinformatics_ignore__ne=True)))
                    biocatdb_seqs = [
                        seq for seq in biocatdb_seqs
                        if seq.sequence != '' and seq.sequence is not None
                    ]

                    if len(unirefs) + len(biocatdb_seqs) != 0:
                        print(
                            f"No SSN for {enz_type_obj.enzyme_type}, but blasts are complete and sequences present..  creating SSN."
                        )
                        job_name = f"{enz_type_obj.enzyme_type}_expand_ssn"
                        current_app.alignment_queue.enqueue(
                            ssn_tasks.task_expand_ssn,
                            enz_type_obj.enzyme_type,
                            job_id=job_name)

    else:
        print(f"Length blast queue = {len(current_app.blast_queue.jobs)}")
        print(
            f"Length process blast queue = {len(current_app.process_blasts_queue.jobs)}"
        )
        print(
            f"Length alignment queue = {len(current_app.alignment_queue.jobs)}"
        )
예제 #4
0
    def _get_db_object(self):
        """ Either finds existing db entry for ssn of enzyme type, or makes a new one """

        query = SSN_record.objects(enzyme_type=self.enzyme_type_obj)
        if len(query) == 0:
            db_ssn = SSN_record(enzyme_type=self.enzyme_type_obj)
        else:
            db_ssn = query[0]

        return db_ssn
예제 #5
0
def check_blast_status(enzyme_type):
    seqs = Sequence.objects(
        db.Q(enzyme_type=enzyme_type) & db.Q(bioinformatics_ignore__ne=True)
        & db.Q(reviewed=True))
    enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]

    all_complete = True
    for seq in seqs:
        if seq.blast is None:
            all_complete = False
            enz_type_obj.bioinformatics_status = 'Queued for update'
            enz_type_obj.save()

    if all_complete == True:
        if enz_type_obj.bioinformatics_status != 'Complete':
            enz_type_obj.bioinformatics_status = 'Complete'
            enz_type_obj.save()
            ssn_q = SSN_record.objects(enzyme_type=enz_type_obj)
            if len(ssn_q) == 1:
                ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0]
                ssn_record.status = 'Queued for update'
                ssn_record.save()
예제 #6
0
def clear_empty_ssns():
    ssn_records = SSN_record.objects().select_related()

    for ssn_r in ssn_records:
        enzyme_type_obj = ssn_r.enzyme_type
        unirefs = UniRef50.objects(enzyme_type=enzyme_type_obj)
        biocat_seqs = Sequence.objects(
            db.Q(enzyme_type=enzyme_type_obj.enzyme_type)
            & db.Q(sequence__ne="") & db.Q(sequence__ne=None)
            & db.Q(sequence_unavailable__ne=True))

        if len(unirefs) + len(biocat_seqs) == 0:
            ssn_r.delete()

    result = {'status': 'success', 'msg': f'Empty SSNs removed', 'issues': []}
    return jsonify(result=result)
예제 #7
0
def mark_not_aligned():
    enzyme_type = request.form['enzyme_type']
    sequences = Sequence.objects(enzyme_type=enzyme_type)

    for seq in sequences:
        seq.alignments_made = False
        seq.save()

    enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]
    ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0]
    ssn_record.status = 'Queued for update'
    ssn_record.save()

    result = {'status': 'success',
              'msg': f"Done",
              'issues': []}

    return jsonify(result=result)
예제 #8
0
    def set_choices(self):
        ssn_records = SSN_record.objects().distinct('enzyme_type')
        enzyme_types = EnzymeType.objects()

        list_enzyme_types = []
        enzyme_descriptions = {}
        for enz_type in enzyme_types:
            if enz_type in ssn_records:
                enzyme_descriptions[
                    enz_type.
                    enzyme_type] = f"{enz_type.enzyme_type} - {enz_type.full_name}"
                list_enzyme_types.append(enz_type.enzyme_type)

        list_enzyme_types = sorted(list_enzyme_types)

        self.enzyme_type.choices = []
        for key in list_enzyme_types:
            self.enzyme_type.choices.append((key, enzyme_descriptions[key]))
예제 #9
0
def bioinformatics_admin_page():
    enzyme_types = EnzymeType.objects().order_by('enzyme_type')

    biostat = {}
    ssn = {}
    for enz_type_obj in enzyme_types:
        enz_type = enz_type_obj.enzyme_type
        biostat[enz_type] = enz_type_obj.bioinformatics_status
        q = SSN_record.objects(enzyme_type=enz_type_obj)
        if len(q) != 0:
            ssn[enz_type] = q[0].status
        else:
            ssn[enz_type] = 'None'

    enzyme_numbers = {}
    for enz_type_obj in enzyme_types:
        enz_type = enz_type_obj.enzyme_type
        enzyme_numbers[enz_type] = {}
        enzyme_numbers[enz_type]['biocatdb'] = len(Sequence.objects(enzyme_type=enz_type))
        enzyme_numbers[enz_type]['uniref'] = len(UniRef50.objects(enzyme_type=enz_type_obj))

    enz_type_dict = {}
    for enz_type_obj in enzyme_types:
        enz_type = enz_type_obj.enzyme_type
        enz_type_dict[enz_type] = 0
        seqs = Sequence.objects(enzyme_type=enz_type)
        if len(seqs) != 0:
            for seq in seqs:
                if seq.blast is not None:
                    enz_type_dict[enz_type] += 1
            if enz_type_dict[enz_type] != 0:
                enz_type_dict[enz_type] = round((enz_type_dict[enz_type]/len(seqs))*100, 0)

    registry = StartedJobRegistry(queue=current_app.blast_queue)
    num_jobs = registry.count

    return render_template('bioinformatics/bioinformatics_admin.html',
                           blasted_enz_types=enz_type_dict,
                           biostat=biostat,
                           ssn=ssn,
                           num_jobs=num_jobs,
                           enzyme_numbers=enzyme_numbers)
예제 #10
0
def ssn_object():
    enzyme_type = request.form['enzyme_type']
    enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]
    ssn_obj = SSN_record.objects(enzyme_type=enzyme_type_obj)[0]

    num_biocatdb = Sequence.objects(enzyme_type=enzyme_type).count()
    num_uniref = UniRef50.objects(enzyme_type=enzyme_type_obj).count()

    precalc_choices = {}
    for score in ssn_obj.num_at_alignment_score:
        clusters = ssn_obj.num_at_alignment_score[score]
        idt = ssn_obj.identity_at_alignment_score[score]

        choice_text = f"{score}, {clusters} clusters, avg identity {idt[0]} ± {idt[1]}"
        precalc_choices[score] = choice_text

    result = {'status': ssn_obj.status,
              'num_biocatdb': num_biocatdb,
              'num_uniref': num_uniref,
              'precalculated': precalc_choices}
    return jsonify(result=result)