예제 #1
0
def task_get_ssn(enzyme_type, score, hide_mutants, only_biocatdb):
    job = get_current_job()
    job.meta['progress'] = 'started'
    job.save_meta()

    ssn = SSN(enzyme_type)

    job.meta['progress'] = 'ssn loaded'
    job.save_meta()

    if only_biocatdb is False and str(score) in ssn.db_object.precalculated_vis:
        nodes = ssn.db_object.precalculated_vis[str(score)]
        # Need to filter mutants here
    else:
        ssn.load(include_mutants=not hide_mutants, only_biocatdb=only_biocatdb)
        vis = SSN_Visualiser(enzyme_type, log_level=1)
        nodes, edges = vis.visualise(ssn, score)

    edges = []

    result = {'nodes': nodes,
              'edges': edges,
              'alignment_score': score,
              'enzyme_type': enzyme_type,
              'hide_mutants': hide_mutants,
              'only_biocatdb': only_biocatdb}

    return result
예제 #2
0
def get_clusters():
    enzyme_type = request.form['enzyme_type']
    alignment_score = int(request.form['alignment_score'])
    only_biocatdb = bool(strtobool(request.form['only_biocatdb']))
    hide_mutants = bool(strtobool(request.form['hide_mutants']))

    ssn = SSN(enzyme_type)
    ssn.load(include_mutants=not hide_mutants, only_biocatdb=only_biocatdb)
    without_uniref, with_uniref = ssn.get_clusters(alignment_score)

    result = {'with_uniref': with_uniref, 'without_uniref': without_uniref}
    return jsonify(result=result)
예제 #3
0
def precalculate_job(enzyme_type):
    ssn = SSN(enzyme_type)
    ssn.load(include_mutants=False, only_biocatdb=False)

    ssn_precalc = SSN_Cluster_Precalculator(ssn)

    num_nodes = len(list(ssn.graph.nodes))
    if num_nodes > 3000:
        num = 1
    elif num_nodes > 1000:
        num = 5
    else:
        num = 20

    if len(list(ssn.db_object.precalculated_vis.keys())) == 0:
        print('No existing % identity data, starting at alignment score 40')
        ssn_precalc.start = 40
        current_num_clusters = 0
    else:
        start_list = [
            int(s)
            for s in list(ssn.db_object.identity_at_alignment_score.keys())
        ]
        current_num_clusters = max(
            list(ssn.db_object.num_at_alignment_score.values()))
        ssn_precalc.start = max(start_list) + 5

    print(f"Start = {ssn_precalc.start}")
    precalculated_nodes, cluster_numbers, identity_at_score = ssn_precalc.precalulate(
        num=num, current_num_clusters=current_num_clusters)

    if len(precalculated_nodes) == 0:
        print('Precalc complete - checking SSN again')
        current_app.alignment_queue.enqueue(task_expand_ssn, enzyme_type)
    else:
        ssn.db_object.precalculated_vis.update(precalculated_nodes)
        ssn.db_object.num_at_alignment_score.update(cluster_numbers)
        ssn.db_object.identity_at_alignment_score.update(identity_at_score)
        ssn.db_object.save()
        current_app.preprocess_queue.enqueue(precalculate_job, enzyme_type)
예제 #4
0
def remove_sequence(enzyme_type, enzyme_name):
    ssn = SSN(enzyme_type)
    ssn.load()

    if len(list(ssn.graph.nodes)) != 0:
        if enzyme_name in list(ssn.graph.nodes):
            ssn.graph.nodes.remove(enzyme_name)
            ssn.save()
            current_app.alignment_queue.enqueue(task_expand_ssn, enzyme_type)
예제 #5
0
def new_expand_ssn_job(enzyme_type):
    ssn = SSN(enzyme_type)
    if ssn.db_object.status != 'Complete':
        current_app.alignment_queue.enqueue(task_expand_ssn, enzyme_type)
예제 #6
0
def task_expand_ssn(enzyme_type, log_level=1, max_num=200):
    current_app.app_context().push()

    aba_blaster = AllByAllBlaster(enzyme_type, log_level=log_level)
    aba_blaster.make_blast_db()

    ssn = SSN(enzyme_type, aba_blaster=aba_blaster, log_level=log_level)
    ssn.load()
    ssn.set_status('Checking SSN')
    ssn.remove_nonexisting_seqs()
    ssn.remove_seqs_marked_with_no_alignments()

    biocatdb_seqs = ssn.nodes_not_present(only_biocatdb=True, max_num=max_num)
    if len(biocatdb_seqs) != 0:
        ssn.clear_position_information()
        ssn.set_status('Adding and aligning BioCatDB sequences')
        ssn.add_multiple_proteins(biocatdb_seqs)
        ssn.save()
        current_app.alignment_queue.enqueue(new_expand_ssn_job, enzyme_type)
        return

    need_alignments = ssn.nodes_need_alignments(max_num=max_num)
    if len(need_alignments) != 0:
        ssn.clear_position_information()
        ssn.set_status('Aligning sequences in SSN')
        ssn.add_multiple_proteins(need_alignments)
        ssn.save()
        current_app.alignment_queue.enqueue(new_expand_ssn_job, enzyme_type)
        return

    not_present = ssn.nodes_not_present(max_num=max_num)
    if len(not_present) != 0:
        ssn.clear_position_information()
        ssn.set_status('Adding UniRef sequences which are not yet present')
        ssn.add_multiple_proteins(not_present)
        ssn.save()
        current_app.alignment_queue.enqueue(new_expand_ssn_job, enzyme_type)

        return

    if ssn.db_object.precalculated_vis == {} and len(ssn.graph.nodes) >= 20:
        ssn.set_status('Precalculating visualisations')
        current_app.preprocess_queue.enqueue(precalculate_job, enzyme_type)

    else:
        ssn.set_status('Complete')
        print(f'- SSN CONSTRUCTION FOR {enzyme_type} IS COMPLETE -')
        ssn.db_object.save()