def get_cigar(s1, s2): import parasail if kind == "local": result = parasail.sw_trace(s1, s2, 101, 10, parasail.pam100) elif kind == "semi-global": result = parasail.sg_trace(s1, s2, 101, 10, parasail.pam100) elif kind == "global": result = parasail.nw_trace(s1, s2, 101, 10, parasail.pam100) else: raise ValueError( "The kind of alignment must be global, semi-global, or local.") output = [] for i in result.cigar.seq: #print(result.cigar.decode_len(i), result.cigar.decode_op(i).decode()) output += [ result.cigar.decode_len(i), result.cigar.decode_op(i).decode() ] for i in output[:0]: float(i) total_base = sum(output[:0]) # I'll convert to the canonical way of showing these things (e.g. SAMFILES - pysam) # CIGAR type ct = output[1::2] # CIGAR length cl = output[::2] output = list(zip(ct, cl)) return output
def test1(): p = parasail.ssw_init("asdf", parasail.blosum62, 1) r = parasail.ssw_profile(p, "asdf", 10, 1) print(p.s1) print(p.s1Len) print(r.cigarLen) print(r.cigar[0]) r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) c = r.cigar print(c.len) print(c.seq[0]) print(c.decode) p = parasail.profile_create_8("asdf", parasail.blosum62) r = parasail.sw_trace_striped_profile_8(p, "asdf", 10, 1) c = r.cigar print(c.len) print(c.seq[0]) r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) print(r.query) print(r.ref)
def test1(self): p = parasail.ssw_init("asdf", parasail.blosum62, 1) r = parasail.ssw_profile(p, "asdf", 10, 1) print(p.s1) print(p.s1Len) print(r.cigarLen) print(r.cigar[0]) r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) c = r.cigar print(c.len) print(c.seq[0]) print(c.decode) p = parasail.profile_create_8("asdf", parasail.blosum62) r = parasail.sw_trace_striped_profile_8(p, "asdf", 10, 1) c = r.cigar print(c.len) print(c.seq[0]) r = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) print(r.query) print(r.ref)
def _get_hit_parasail(vj, all_genes, organism, ab, blast_seq, user_matrix=ps.matrix_create("ACGT", 3, -5)): """ The workhorse function that can be inserted into the VJ loop in parse_unpaired_dna_sequence_blastn() It returns objects that mimic the returns of functions: parse_blast_alignments() get_all_hits_with_evalues_and_scores() :param vj: :param all_genes: :param organism: :param ab: :param blast_seq: :param user_matrix: :return: 2-part dictionary containing "hits" and "hits_scores {"hits" : {"tmp": ParasailMatch.instance}, "hits_scores" : [(id,score,evalue),((id,score,evalue))]} "hits" returns a ParasailMatch instance that mimics the attributes of a BlastMatch instance "hits_scores" returns a list of 3-part tuples withm, 0: hit_id, 1: alignment score, 2: evalue approximation """ ids = _get_ids_by_org_chain_region(organism=organism, chain=ab, region=vj, d=all_genes) # seqs is a list of tuples # 0: hit_id, # 1: hit_seq (full length reference seq) # 2: strand (-1 = rev comp) seqs = _get_sequence_tuples_from_ids(ids=ids, organism=organism, d=all_genes) scores = [] user_matrix = ps.matrix_create("ACGT", 1, -3) # -5 for i in range(len(seqs)): # smith-waterman alignment implemented using parasail s = ps.sw_trace( s1=blast_seq, s2=seqs[i][1], extend=2, #3 open=5, #5 matrix=user_matrix) scores.append({ 'score': s.score, 'parasail_result': s, 'query_seq': blast_seq, 'hit_id': seqs[i][0], 'hit_seq': seqs[i][1], 'h_strand': seqs[i][2], 'q_strand': 1 }) # sort parasail results from highest to lowest alignment score scores = sorted(scores, key=lambda x: x['score'], reverse=True) id_score_evalue = [(s['hit_id'], s['score'], _evalue_aproximation(s['score'])) for s in scores] # add alignment start positions for the highest scoring alignment scores[0]["parasail_result"].get_traceback() scores[0]["q_seq"] = scores[0]["parasail_result"]._traceback.query scores[0]["h_seq"] = scores[0]["parasail_result"]._traceback.ref scores[0]["comp"] = scores[0]["parasail_result"]._traceback.comp scores[0]["q_start"] = _q_start(q_seq=scores[0]["q_seq"], query_seq=blast_seq) scores[0]["q_stop"] = _q_stop(q_seq=scores[0]["q_seq"], query_seq=blast_seq) scores[0]["h_start"] = _h_start(h_seq=scores[0]["h_seq"], hit_seq=scores[0]["hit_seq"], h_strand=scores[0]["h_strand"]) scores[0]["h_stop"] = _h_stop(h_seq=scores[0]["h_seq"], hit_seq=scores[0]["hit_seq"], h_strand=scores[0]["h_strand"]) scores[0]["identities"] = _identities(scores[0]["comp"]) # add q2hmap for the highest scoring alignment q2hmap = _create_q2hmap(q_seq=scores[0]["q_seq"], h_seq=scores[0]["h_seq"], q_start=scores[0]['q_start'], h_start=scores[0]['h_start'], q_strand=scores[0]['q_strand'], h_strand=scores[0]['h_strand']) phony_evalue_must_update_function = _evalue_aproximation( scores[0]['score']) # bm2 is going to replace teh BlastMatch instance passed by parse_blast_alignments() # we only produce it for the top scoring hit scores[0], but the code is written, # so that we could produce ParasailMatches in a loop bm2 = ParasailMatch(query_id="tmp", hit_id=scores[0]['hit_id']) bm2.evalue = phony_evalue_must_update_function #! SHOULD UPDATE bm2.identities = scores[0]["identities"] # percent identities out of 100 bm2.h_start = scores[0]['h_start'] # 0-indexed bm2.h_stop = scores[0]['h_stop'] bm2.h_strand = scores[0]['h_strand'] bm2.h_align = scores[0]['h_seq'] bm2.q_start = scores[0]['h_start'] bm2.q_stop = scores[0]['h_stop'] bm2.q_strand = scores[0]['q_strand'] bm2.q_align = scores[0]['q_seq'] bm2.middleseq = scores[0]['comp'] bm2.q2hmap = q2hmap # q2hmap ## 0-indexed numbering wrt to fullseq bm2.valid = "True" # valid IF WHAT? bm2.frame = 'NA' # results are meant to mimic the outputs in prior functions from blast version: # hits = parse_blast_alignments( blast_tmpfile+'.blast', evalue_threshold, identity_threshold ) # hits_scores = get_all_hits_with_evalues_and_scores( blast_tmpfile+'.blast' ) ## id,bitscore,evalue results = {"hits": {"tmp": bm2}, "hits_scores": id_score_evalue} return (results)
def test4(): parasail.set_case_sensitive(True) matrix = parasail.matrix_create("ACGT", 2, 1) result = parasail.sw_trace("ACGT", "AcgT", 10, 1, matrix) traceback = result.traceback print_traceback_attributes(traceback)
def test22(): matrix = parasail.matrix_create("ACGTacgt", 2, 1, True) result = parasail.sw_trace("ACGT", "AcgT", 10, 1, matrix) traceback = result.get_traceback(case_sensitive=True) print_traceback_attributes(traceback)
def test21(): matrix = parasail.matrix_create("ACGTacgt", 2, 1, True) result = parasail.sw_trace("ACGT", "AcgT", 10, 1, matrix) traceback = result.traceback print_traceback_attributes(traceback)
def test2(): result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) cigar = result.cigar print_cigar_attributes(cigar)
def test3(): result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) traceback = result.traceback print_traceback_attributes(traceback)
def test2(): result = parasail.sw_trace("asdf","asdf",10,1,parasail.blosum62) cigar = result.cigar print_cigar_attributes(cigar)
def test3(): result = parasail.sw_trace("asdf","asdf",10,1,parasail.blosum62) traceback = result.traceback print_traceback_attributes(traceback)
def test4(): parasail.set_case_sensitive(True) matrix = parasail.matrix_create("ACGT", 2, 1) result = parasail.sw_trace("ACGT","AcgT",10,1,matrix) traceback = result.traceback print_traceback_attributes(traceback)
def test22(): matrix = parasail.matrix_create("ACGTacgt", 2, 1, True) result = parasail.sw_trace("ACGT","AcgT",10,1,matrix) traceback = result.get_traceback(case_sensitive=True) print_traceback_attributes(traceback)
def test21(): matrix = parasail.matrix_create("ACGTacgt", 2, 1, True) result = parasail.sw_trace("ACGT","AcgT",10,1,matrix) traceback = result.traceback print_traceback_attributes(traceback)
def test3(self): result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) print_traceback_attributes(result.traceback)
def test2(self): result = parasail.sw_trace("asdf", "asdf", 10, 1, parasail.blosum62) print_cigar_attributes(result.cigar)