Beispiel #1
0
def nw_hamming_metric(s1, s2, matrix='blosum62', open=3, extend=3):
    """Function applying Parasail's Needleman-Wuncsh Algorithm to align and
    compute a Hamming Distance between any two sequences: number of
    mismatched positions. Gaps count as a mismatch. Penalties and matrix
    are used for alignment purposes, not in the distance calculation.

    Parameters
    ----------
    s1: string
        string containing amino acid letters
    s2: string
        string containing amino acid letters
    matrix : str
        Attribute of parasail that names a substitution matrix

    Returns
    -------
    D : float
        distance between strings (Hamming Distance: number of mismatched positions)

    Notes
    -----
    .. code-block:: python

        xy = parasail.nw_stats(s1, s2, open=open, extend=extend, matrix=matrix)
        xy_t = parasail.nw_trace(s1, s2, open=open, extend=extend, matrix=matrix)
        hamming_distance = len(xy_t.traceback.comp)-xy.matches
        return hamming_distance"""
    p_matrix = getattr(parasail, matrix)
    xy = parasail.nw_stats(s1, s2, open=open, extend=extend, matrix=p_matrix)
    xy_t = parasail.nw_trace(s1, s2, open=open, extend=extend, matrix=p_matrix)
    D = len(xy_t.traceback.comp)-xy.matches
    return D
    def get_cigar(s1, s2):
        import parasail

        if kind == "local":
            result = parasail.sw_trace(s1, s2, 101, 10, parasail.pam100)
        elif kind == "semi-global":
            result = parasail.sg_trace(s1, s2, 101, 10, parasail.pam100)
        elif kind == "global":
            result = parasail.nw_trace(s1, s2, 101, 10, parasail.pam100)
        else:
            raise ValueError(
                "The kind of alignment must be global, semi-global, or local.")

        output = []
        for i in result.cigar.seq:
            #print(result.cigar.decode_len(i), result.cigar.decode_op(i).decode())
            output += [
                result.cigar.decode_len(i),
                result.cigar.decode_op(i).decode()
            ]
        for i in output[:0]:
            float(i)
        total_base = sum(output[:0])
        # I'll convert to the canonical way of showing these things (e.g. SAMFILES - pysam)
        # CIGAR type
        ct = output[1::2]
        # CIGAR length
        cl = output[::2]
        output = list(zip(ct, cl))
        return output
def align(sequence1: str, sequence2: str):
    """ Run the local pairwise alignment of two strings and return alignment data. """
    result = parasail.nw_trace(sequence1, sequence2, GAP_OPEN_PENALTY,
                               GAP_EXTEND_PENALTY, parasail.blosum80)
    cigar_text = result.cigar.decode
    result = result.traceback.ref, result.traceback.query, result.score
    return result
def align_affine(query: str, target: str, match_score: int, mismatch_cost: int,
                 gap_open_cost: int, gap_extension_cost: int) -> Cigar:
    assert min(match_score, mismatch_cost, gap_open_cost,
               gap_extension_cost) >= 0, "Specify positive integers"
    return Cigar(
        parasail.nw_trace(
            query, target, gap_open_cost, gap_extension_cost,
            parasail.matrix_create(
                "ACGT", match_score,
                -mismatch_cost)).cigar.decode.decode('utf-8'))
Beispiel #5
0
    def align(
        self,
        x,
        y,
        open_penalty,
        extend_penalty,
        matrix=parasail.blosum62,
    ):
        """
        Function that takes two strings and returns a
        parasail result object. Uses:

        parasail.nw_trace() - Needleman-Wunsch Global Alignment
        parasail.blossum62  - blossum62 substitution matrix

        The blosum62 matrix may warrant scrutiny:
        Selecting the Right Similarity-Scoring Matrix
        https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3848038/

        Parameters
        ----------
        x : string
            Amino acid string passed to arg s1 in parasail.sw_trace
        y : string
            Amino acid string passed to arg s2 in parasail.sw_trace
        open_penalty: int
            gap opening penalty
        extent_penalty: int
            gap extension penalty
        matrix:
            parasail.blossum62 - blossum62 substitution matrix

        Returns
        -------
        r : parasail.bindings_v2.Result
            Parasail result from sw_trace alignment
            r.traceback.query,


        Raises
        ------
        """

        r = parasail.nw_trace(s1=x,
                              s2=y,
                              extend=extend_penalty,
                              open=open_penalty,
                              matrix=matrix)
        return (r)
Beispiel #6
0
def pairwise_nw_trace_align(ref_seq,
                            query_seq,
                            gap_open=10,
                            gap_extension=1,
                            matrix=parasail.blosum62):
    """
    :param ref_seq:
    :param query_seq:
    :param gap_open: Default 10
    :param gap_extension: Default 1
    :param matrix: Default BLOSUM62
    :return: parasail result (includes a cigar which can be decoded)
    """
    result = parasail.nw_trace(query_seq, ref_seq, gap_open, gap_extension,
                               matrix)
    logging.debug("Parasail result %s" % result.cigar.decode)
    return result
Beispiel #7
0
def hm_metric(s1, s2, matrix='blosum62', open=3, extend=3):
    """
    Function applying Parasail's Needleman-Wuncsh Algorithm to allign and get
    a Hamming Distance between any two sequences: number of mismatched positions


    Parameters
    ----------
    s1: string
        string containing amino acid letters

    s2: string
        string containing amino acid letters

    Returns
    -------
    D : float
        distance between strings (Hamming Distance: number of mismatched positions)

    Notes
    -----

    .. code-block:: python

        xy = parasail.nw_stats(s1, s2, open=open, extend=extend, matrix=matrix)
        xy_t = parasail.nw_trace(s1, s2, open=open, extend=extend, matrix=matrix)
        hamming_distance = len(xy_t.traceback.comp)-xy.matches
        return hamming_distance



    """
    p_matrix = getattr(parasail, matrix)
    xy = parasail.nw_stats(s1, s2, open=open, extend=extend, matrix=p_matrix)
    xy_t = parasail.nw_trace(s1, s2, open=open, extend=extend, matrix=p_matrix)
    D = len(xy_t.traceback.comp) - xy.matches
    return D
Beispiel #8
0
def hm_matches(s1, s2, matrix=parasail.blosum62, open=3, extend=3):
    xy = parasail.nw_stats(s1, s2, open=open, extend=extend, matrix=matrix)
    xy_t = parasail.nw_trace(s1, s2, open=open, extend=extend, matrix=matrix)
    return xy.matches