def parasail_alignment(s1, s2, match_score=2, mismatch_penalty=-2, opening_penalty=3, gap_ext=1): user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty) result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext, user_matrix) if result.saturated: print("SATURATED!", len(s1), len(s2)) result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext, user_matrix) print("computed 32 bit instead") # difference in how to obtain string from parasail between python v2 and v3... if sys.version_info[0] < 3: cigar_string = str(result.cigar.decode).decode('utf-8') else: cigar_string = str(result.cigar.decode, 'utf-8') s1_alignment, s2_alignment, cigar_tuples = cigar_to_seq( cigar_string, s1, s2) # print(result.score, len(s1), len(s2)) # print(s1_alignment) # print(s2_alignment) # print(cigar_string) # sys.exit() # print(dir(result)) # print(result.end_query, result.end_ref, result.len_query, result.len_ref, result.length, result.matches) # print() return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
def parasail_block_alignment(s1, s2, k, match_id, x_acc="", y_acc="", match_score=2, mismatch_penalty=-2, opening_penalty=5, gap_ext=1, ends_discrepancy_threshold=0): user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty) result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext, user_matrix) if result.saturated: print("SATURATED!") result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext, user_matrix) if sys.version_info[0] < 3: cigar_string = str(result.cigar.decode).decode('utf-8') else: cigar_string = str(result.cigar.decode, 'utf-8') s1_alignment, s2_alignment = cigar_to_seq(cigar_string, s1, s2) # Rolling window of matching blocks # k=15 # match_id = int(k*0.8) 1.0 - math.ceil(window_fraction) match_vector = [ 1 if n1 == n2 else 0 for n1, n2 in zip(s1_alignment, s2_alignment) ] # print("".join([str(m) for m in match_vector])) match_window = deque(match_vector[:k]) # initialization current_match_count = sum(match_window) aligned_region = [] if current_match_count >= match_id: aligned_region.append(1) else: aligned_region.append(0) for new_m_state in match_vector[k:]: prev_m_state = match_window.popleft() current_match_count = current_match_count - prev_m_state + new_m_state match_window.append(new_m_state) if current_match_count >= match_id: aligned_region.append(1) else: aligned_region.append(0) # print("".join([str(m) for m in aligned_region])) # print("Aligned ratio (tot aligned/len(seq1):", sum(aligned_region)/float(len(s1))) alignment_ratio = sum(aligned_region) / float(len(s1)) return (s1, s2, (s1_alignment, s2_alignment, alignment_ratio))
def parasail_block_alignment(s1, s2, k, match_id, match_score=2, mismatch_penalty=-2, opening_penalty=5, gap_ext=1): user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty) result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext, user_matrix) if result.saturated: print("SATURATED!", len(s1), len(s2)) result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext, user_matrix) print("computed 32 bit instead") # difference in how to obtain string from parasail between python v2 and v3... if sys.version_info[0] < 3: cigar_string = str(result.cigar.decode).decode('utf-8') else: cigar_string = str(result.cigar.decode, 'utf-8') s1_alignment, s2_alignment = help_functions.cigar_to_seq( cigar_string, s1, s2) # Rolling window of matching blocks match_vector = [ 1 if n1 == n2 else 0 for n1, n2 in zip(s1_alignment, s2_alignment) ] match_window = deque(match_vector[:k]) # initialization current_match_count = sum(match_window) aligned_region = [] if current_match_count >= match_id: aligned_region.append(1) else: aligned_region.append(0) for new_m_state in match_vector[k:]: prev_m_state = match_window.popleft() current_match_count = current_match_count - prev_m_state + new_m_state match_window.append(new_m_state) if current_match_count >= match_id: aligned_region.append(1) else: aligned_region.append(0) # print("".join([str(m) for m in aligned_region])) # print("Aligned ratio (tot aligned/len(seq1):", sum(aligned_region)/float(len(s1))) alignment_ratio = sum(aligned_region) / float(len(s1)) return (s1, s2, (s1_alignment, s2_alignment, alignment_ratio))
def parasail_local(s1, s2, match_score=2, mismatch_penalty=-2, opening_penalty=3, gap_ext=1): user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty) result = parasail.sw_trace_scan_16(s1, s2, opening_penalty, gap_ext, user_matrix) if result.saturated: print("SATURATED!", len(s1), len(s2)) result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext, user_matrix) print("computed 32 bit instead") # difference in how to obtain string from parasail between python v2 and v3... if sys.version_info[0] < 3: cigar_string = str(result.cigar.decode).decode('utf-8') else: cigar_string = str(result.cigar.decode, 'utf-8') s1_alignment, s2_alignment, cigar_tuples = cigar_to_seq( cigar_string, s1[result.cigar.beg_query:result.end_query], s2[result.cigar.beg_ref:result.end_ref]) # print(result.traceback.ref) # print(result.traceback.comp) # print(result.traceback.query) # print(result.score, len(s1), len(s2)) print("read", s1_alignment) print("Rref", s2_alignment) print(result.cigar.beg_query, result.end_query) print(result.cigar.beg_ref, result.end_ref) print(cigar_string) # print(result.cigar.seq) # sys.exit() # print(dir(result)) # for attr, value in result.__dict__.items(): # print(attr, value) # print(result.end_query, result.end_ref, result.len_query, result.len_ref, result.length, result.matches) # print() return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
def parasail_alignment(read, reference, x_acc="", y_acc="", match_score=2, mismatch_penalty=-2, opening_penalty=2, gap_ext=1, ends_discrepancy_threshold=0): user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty) result = parasail.sg_trace_scan_16(read, reference, opening_penalty, gap_ext, user_matrix) if result.saturated: print("SATURATED!") result = parasail.sg_trace_scan_32(read, reference, opening_penalty, gap_ext, user_matrix) if sys.version_info[0] < 3: cigar_string = str(result.cigar.decode).decode('utf-8') else: cigar_string = str(result.cigar.decode, 'utf-8') read_alignment, ref_alignment = cigar_to_seq(cigar_string, read, reference) return read_alignment, ref_alignment
def aln_nucleotides(seq1, name1, seq2, name2): result = parasail.sg_trace_scan_32(seq1, seq2, 10, 1, parasail.nuc44) return construct_psl(name1, name2, result)
def aln_proteins(seq1, name1, seq2, name2): result = parasail.sg_trace_scan_32(seq1, seq2, 10, 1, parasail.blosum62) return construct_psl(name1, name2, result)