Beispiel #1
0
def parasail_alignment(s1,
                       s2,
                       match_score=2,
                       mismatch_penalty=-2,
                       opening_penalty=3,
                       gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!", len(s1), len(s2))
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
        print("computed 32 bit instead")

    # difference in how to obtain string from parasail between python v2 and v3...
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')
    s1_alignment, s2_alignment, cigar_tuples = cigar_to_seq(
        cigar_string, s1, s2)
    # print(result.score, len(s1), len(s2))
    # print(s1_alignment)
    # print(s2_alignment)
    # print(cigar_string)
    # sys.exit()
    # print(dir(result))
    # print(result.end_query, result.end_ref, result.len_query, result.len_ref, result.length, result.matches)
    # print()
    return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
Beispiel #2
0
def parasail_block_alignment(s1,
                             s2,
                             k,
                             match_id,
                             x_acc="",
                             y_acc="",
                             match_score=2,
                             mismatch_penalty=-2,
                             opening_penalty=5,
                             gap_ext=1,
                             ends_discrepancy_threshold=0):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!")
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')

    s1_alignment, s2_alignment = cigar_to_seq(cigar_string, s1, s2)

    # Rolling window of matching blocks
    # k=15
    # match_id = int(k*0.8)  1.0 - math.ceil(window_fraction)
    match_vector = [
        1 if n1 == n2 else 0 for n1, n2 in zip(s1_alignment, s2_alignment)
    ]
    # print("".join([str(m) for m in match_vector]))

    match_window = deque(match_vector[:k])  # initialization
    current_match_count = sum(match_window)
    aligned_region = []
    if current_match_count >= match_id:
        aligned_region.append(1)
    else:
        aligned_region.append(0)

    for new_m_state in match_vector[k:]:
        prev_m_state = match_window.popleft()
        current_match_count = current_match_count - prev_m_state + new_m_state
        match_window.append(new_m_state)

        if current_match_count >= match_id:
            aligned_region.append(1)
        else:
            aligned_region.append(0)

    # print("".join([str(m) for m in aligned_region]))
    # print("Aligned ratio (tot aligned/len(seq1):", sum(aligned_region)/float(len(s1)))
    alignment_ratio = sum(aligned_region) / float(len(s1))
    return (s1, s2, (s1_alignment, s2_alignment, alignment_ratio))
Beispiel #3
0
def parasail_block_alignment(s1,
                             s2,
                             k,
                             match_id,
                             match_score=2,
                             mismatch_penalty=-2,
                             opening_penalty=5,
                             gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!", len(s1), len(s2))
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
        print("computed 32 bit instead")

    # difference in how to obtain string from parasail between python v2 and v3...
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')

    s1_alignment, s2_alignment = help_functions.cigar_to_seq(
        cigar_string, s1, s2)

    # Rolling window of matching blocks
    match_vector = [
        1 if n1 == n2 else 0 for n1, n2 in zip(s1_alignment, s2_alignment)
    ]
    match_window = deque(match_vector[:k])  # initialization
    current_match_count = sum(match_window)
    aligned_region = []
    if current_match_count >= match_id:
        aligned_region.append(1)
    else:
        aligned_region.append(0)

    for new_m_state in match_vector[k:]:
        prev_m_state = match_window.popleft()
        current_match_count = current_match_count - prev_m_state + new_m_state
        match_window.append(new_m_state)

        if current_match_count >= match_id:
            aligned_region.append(1)
        else:
            aligned_region.append(0)

    # print("".join([str(m) for m in aligned_region]))
    # print("Aligned ratio (tot aligned/len(seq1):", sum(aligned_region)/float(len(s1)))
    alignment_ratio = sum(aligned_region) / float(len(s1))
    return (s1, s2, (s1_alignment, s2_alignment, alignment_ratio))
Beispiel #4
0
def parasail_local(s1,
                   s2,
                   match_score=2,
                   mismatch_penalty=-2,
                   opening_penalty=3,
                   gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sw_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!", len(s1), len(s2))
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
        print("computed 32 bit instead")

    # difference in how to obtain string from parasail between python v2 and v3...
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')
    s1_alignment, s2_alignment, cigar_tuples = cigar_to_seq(
        cigar_string, s1[result.cigar.beg_query:result.end_query],
        s2[result.cigar.beg_ref:result.end_ref])
    # print(result.traceback.ref)
    # print(result.traceback.comp)
    # print(result.traceback.query)
    # print(result.score, len(s1), len(s2))
    print("read", s1_alignment)
    print("Rref", s2_alignment)
    print(result.cigar.beg_query, result.end_query)
    print(result.cigar.beg_ref, result.end_ref)
    print(cigar_string)
    # print(result.cigar.seq)

    # sys.exit()
    # print(dir(result))
    # for attr, value in result.__dict__.items():
    #     print(attr, value)
    # print(result.end_query, result.end_ref, result.len_query, result.len_ref, result.length, result.matches)
    # print()
    return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
def parasail_alignment(read,
                       reference,
                       x_acc="",
                       y_acc="",
                       match_score=2,
                       mismatch_penalty=-2,
                       opening_penalty=2,
                       gap_ext=1,
                       ends_discrepancy_threshold=0):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(read, reference, opening_penalty,
                                       gap_ext, user_matrix)
    if result.saturated:
        print("SATURATED!")
        result = parasail.sg_trace_scan_32(read, reference, opening_penalty,
                                           gap_ext, user_matrix)
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')

    read_alignment, ref_alignment = cigar_to_seq(cigar_string, read, reference)
    return read_alignment, ref_alignment
def aln_nucleotides(seq1, name1, seq2, name2):
    result = parasail.sg_trace_scan_32(seq1, seq2, 10, 1, parasail.nuc44)
    return construct_psl(name1, name2, result)
def aln_proteins(seq1, name1, seq2, name2):
    result = parasail.sg_trace_scan_32(seq1, seq2, 10, 1, parasail.blosum62)
    return construct_psl(name1, name2, result)