def get_cigar(s1, s2):
        import parasail

        if kind == "local":
            result = parasail.sw_trace(s1, s2, 101, 10, parasail.pam100)
        elif kind == "semi-global":
            result = parasail.sg_trace(s1, s2, 101, 10, parasail.pam100)
        elif kind == "global":
            result = parasail.nw_trace(s1, s2, 101, 10, parasail.pam100)
        else:
            raise ValueError(
                "The kind of alignment must be global, semi-global, or local.")

        output = []
        for i in result.cigar.seq:
            #print(result.cigar.decode_len(i), result.cigar.decode_op(i).decode())
            output += [
                result.cigar.decode_len(i),
                result.cigar.decode_op(i).decode()
            ]
        for i in output[:0]:
            float(i)
        total_base = sum(output[:0])
        # I'll convert to the canonical way of showing these things (e.g. SAMFILES - pysam)
        # CIGAR type
        ct = output[1::2]
        # CIGAR length
        cl = output[::2]
        output = list(zip(ct, cl))
        return output
Example #2
0
def test1():
    p = parasail.ssw_init("asdf", parasail.blosum62, 1)
    r = parasail.ssw_profile(p, "asdf", 10, 1)

    print(p.s1)
    print(p.s1Len)
    print(r.cigarLen)
    print(r.cigar[0])

    r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
    c = r.cigar
    print(c.len)
    print(c.seq[0])
    print(c.decode)

    p = parasail.profile_create_8("asdf", parasail.blosum62)
    r = parasail.sw_trace_striped_profile_8(p, "asdf", 10, 1)
    c = r.cigar
    print(c.len)
    print(c.seq[0])

    r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
    print(r.query)
    print(r.ref)
Example #3
0
    def test1(self):
        p = parasail.ssw_init("asdf", parasail.blosum62, 1)
        r = parasail.ssw_profile(p, "asdf", 10, 1)

        print(p.s1)
        print(p.s1Len)
        print(r.cigarLen)
        print(r.cigar[0])

        r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
        c = r.cigar
        print(c.len)
        print(c.seq[0])
        print(c.decode)

        p = parasail.profile_create_8("asdf", parasail.blosum62)
        r = parasail.sw_trace_striped_profile_8(p, "asdf", 10, 1)
        c = r.cigar
        print(c.len)
        print(c.seq[0])

        r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
        print(r.query)
        print(r.ref)
Example #4
0
def _get_hit_parasail(vj,
                      all_genes,
                      organism,
                      ab,
                      blast_seq,
                      user_matrix=ps.matrix_create("ACGT", 3, -5)):
    """
    The workhorse function that can be inserted into the VJ loop in parse_unpaired_dna_sequence_blastn()
    It returns objects that mimic the returns of functions:
        parse_blast_alignments()
        get_all_hits_with_evalues_and_scores()

    :param vj:
    :param all_genes:
    :param organism:
    :param ab:
    :param blast_seq:
    :param user_matrix:

    :return: 2-part dictionary containing "hits" and "hits_scores
        {"hits" : {"tmp": ParasailMatch.instance}, "hits_scores" : [(id,score,evalue),((id,score,evalue))]}
            "hits" returns a ParasailMatch instance that mimics the attributes of a BlastMatch instance
            "hits_scores" returns a list of 3-part tuples withm, 0: hit_id, 1: alignment score, 2: evalue approximation
    """

    ids = _get_ids_by_org_chain_region(organism=organism,
                                       chain=ab,
                                       region=vj,
                                       d=all_genes)

    # seqs is a list of tuples
    # 0: hit_id,
    # 1: hit_seq (full length reference seq)
    # 2: strand (-1 = rev comp)
    seqs = _get_sequence_tuples_from_ids(ids=ids,
                                         organism=organism,
                                         d=all_genes)
    scores = []
    user_matrix = ps.matrix_create("ACGT", 1, -3)  # -5
    for i in range(len(seqs)):
        # smith-waterman alignment implemented using parasail
        s = ps.sw_trace(
            s1=blast_seq,
            s2=seqs[i][1],
            extend=2,  #3
            open=5,  #5
            matrix=user_matrix)
        scores.append({
            'score': s.score,
            'parasail_result': s,
            'query_seq': blast_seq,
            'hit_id': seqs[i][0],
            'hit_seq': seqs[i][1],
            'h_strand': seqs[i][2],
            'q_strand': 1
        })

    # sort parasail results from highest to lowest alignment score
    scores = sorted(scores, key=lambda x: x['score'], reverse=True)
    id_score_evalue = [(s['hit_id'], s['score'],
                        _evalue_aproximation(s['score'])) for s in scores]

    # add alignment start positions for the highest scoring alignment
    scores[0]["parasail_result"].get_traceback()
    scores[0]["q_seq"] = scores[0]["parasail_result"]._traceback.query
    scores[0]["h_seq"] = scores[0]["parasail_result"]._traceback.ref
    scores[0]["comp"] = scores[0]["parasail_result"]._traceback.comp
    scores[0]["q_start"] = _q_start(q_seq=scores[0]["q_seq"],
                                    query_seq=blast_seq)

    scores[0]["q_stop"] = _q_stop(q_seq=scores[0]["q_seq"],
                                  query_seq=blast_seq)

    scores[0]["h_start"] = _h_start(h_seq=scores[0]["h_seq"],
                                    hit_seq=scores[0]["hit_seq"],
                                    h_strand=scores[0]["h_strand"])

    scores[0]["h_stop"] = _h_stop(h_seq=scores[0]["h_seq"],
                                  hit_seq=scores[0]["hit_seq"],
                                  h_strand=scores[0]["h_strand"])

    scores[0]["identities"] = _identities(scores[0]["comp"])

    # add q2hmap for the highest scoring alignment
    q2hmap = _create_q2hmap(q_seq=scores[0]["q_seq"],
                            h_seq=scores[0]["h_seq"],
                            q_start=scores[0]['q_start'],
                            h_start=scores[0]['h_start'],
                            q_strand=scores[0]['q_strand'],
                            h_strand=scores[0]['h_strand'])

    phony_evalue_must_update_function = _evalue_aproximation(
        scores[0]['score'])

    # bm2 is going to replace teh BlastMatch instance passed by parse_blast_alignments()
    # we only produce it for the top scoring hit scores[0], but the code is written,
    # so that we could produce ParasailMatches in a loop
    bm2 = ParasailMatch(query_id="tmp", hit_id=scores[0]['hit_id'])
    bm2.evalue = phony_evalue_must_update_function  #! SHOULD UPDATE
    bm2.identities = scores[0]["identities"]  # percent identities out of 100
    bm2.h_start = scores[0]['h_start']  # 0-indexed
    bm2.h_stop = scores[0]['h_stop']
    bm2.h_strand = scores[0]['h_strand']
    bm2.h_align = scores[0]['h_seq']
    bm2.q_start = scores[0]['h_start']
    bm2.q_stop = scores[0]['h_stop']
    bm2.q_strand = scores[0]['q_strand']
    bm2.q_align = scores[0]['q_seq']
    bm2.middleseq = scores[0]['comp']
    bm2.q2hmap = q2hmap  # q2hmap ## 0-indexed numbering wrt to fullseq
    bm2.valid = "True"  # valid IF WHAT?
    bm2.frame = 'NA'

    # results are meant to mimic the outputs in prior functions from blast version:
    # hits = parse_blast_alignments( blast_tmpfile+'.blast', evalue_threshold, identity_threshold )
    # hits_scores = get_all_hits_with_evalues_and_scores( blast_tmpfile+'.blast' ) ## id,bitscore,evalue
    results = {"hits": {"tmp": bm2}, "hits_scores": id_score_evalue}
    return (results)
Example #5
0
def test4():
    parasail.set_case_sensitive(True)
    matrix = parasail.matrix_create("ACGT", 2, 1)
    result = parasail.sw_trace("ACGT", "AcgT", 10, 1, matrix)
    traceback = result.traceback
    print_traceback_attributes(traceback)
Example #6
0
def test22():
    matrix = parasail.matrix_create("ACGTacgt", 2, 1, True)
    result = parasail.sw_trace("ACGT", "AcgT", 10, 1, matrix)
    traceback = result.get_traceback(case_sensitive=True)
    print_traceback_attributes(traceback)
Example #7
0
def test21():
    matrix = parasail.matrix_create("ACGTacgt", 2, 1, True)
    result = parasail.sw_trace("ACGT", "AcgT", 10, 1, matrix)
    traceback = result.traceback
    print_traceback_attributes(traceback)
Example #8
0
def test2():
    result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
    cigar = result.cigar
    print_cigar_attributes(cigar)
Example #9
0
def test3():
    result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
    traceback = result.traceback
    print_traceback_attributes(traceback)
Example #10
0
def test2():
    result = parasail.sw_trace("asdf","asdf",10,1,parasail.blosum62)
    cigar = result.cigar
    print_cigar_attributes(cigar)
Example #11
0
def test3():
    result = parasail.sw_trace("asdf","asdf",10,1,parasail.blosum62)
    traceback = result.traceback
    print_traceback_attributes(traceback)
Example #12
0
def test4():
    parasail.set_case_sensitive(True)
    matrix = parasail.matrix_create("ACGT", 2, 1)
    result = parasail.sw_trace("ACGT","AcgT",10,1,matrix)
    traceback = result.traceback
    print_traceback_attributes(traceback)
Example #13
0
def test22():
    matrix = parasail.matrix_create("ACGTacgt", 2, 1, True)
    result = parasail.sw_trace("ACGT","AcgT",10,1,matrix)
    traceback = result.get_traceback(case_sensitive=True)
    print_traceback_attributes(traceback)
Example #14
0
def test21():
    matrix = parasail.matrix_create("ACGTacgt", 2, 1, True)
    result = parasail.sw_trace("ACGT","AcgT",10,1,matrix)
    traceback = result.traceback
    print_traceback_attributes(traceback)
Example #15
0
 def test3(self):
     result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
     print_traceback_attributes(result.traceback)
Example #16
0
 def test2(self):
     result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62)
     print_cigar_attributes(result.cigar)