def local_pairwise_align_ssw(sequence1, sequence2, **kwargs):
    """Align query and target sequences with Striped Smith-Waterman.

    Parameters
    ----------
    sequence1 : DNA, RNA, or Protein
        The first unaligned sequence
    sequence2 : DNA, RNA, or Protein
        The second unaligned sequence

    Returns
    -------
    tuple
        ``TabularMSA`` object containing the aligned sequences, alignment score
        (float), and start/end positions of each input sequence (iterable
        of two-item tuples). Note that start/end positions are indexes into the
        unaligned sequences.

    Notes
    -----
    This is a wrapper for the SSW package [1]_.

    For a complete list of optional keyword-arguments that can be provided,
    see ``skbio.alignment.StripedSmithWaterman``.

    The following kwargs will not have any effect: `suppress_sequences`,
    `zero_index`, and `protein`

    If an alignment does not meet a provided filter, `None` will be returned.

    References
    ----------
    .. [1] Zhao, Mengyao, Wan-Ping Lee, Erik P. Garrison, & Gabor T.
       Marth. "SSW Library: An SIMD Smith-Waterman C/C++ Library for
       Applications". PLOS ONE (2013). Web. 11 July 2014.
       http://www.plosone.org/article/info:doi/10.1371/journal.pone.0082138

    See Also
    --------
    skbio.alignment.StripedSmithWaterman

    """
    for seq in sequence1, sequence2:
        if not isinstance(seq, (DNA, RNA, Protein)):
            raise TypeError(
                "`sequence1` and `sequence2` must be DNA, RNA, or Protein, "
                "not type %r" % type(seq).__name__)

    if type(sequence1) is not type(sequence2):
        raise TypeError(
            "`sequence1` and `sequence2` must be the same type: %r != %r"
            % (type(sequence1).__name__, type(sequence2).__name__))

    # We need the sequences for `TabularMSA` to make sense, so don't let the
    # user suppress them.
    kwargs['suppress_sequences'] = False
    kwargs['zero_index'] = True

    kwargs['protein'] = False
    if isinstance(sequence1, Protein):
        kwargs['protein'] = True

    query = StripedSmithWaterman(str(sequence1), **kwargs)
    alignment = query(str(sequence2))

    # If there is no cigar, then it has failed a filter. Return None.
    if not alignment.cigar:
        return None

    start_end = None
    if alignment.query_begin != -1:
        start_end = [
            (alignment.query_begin, alignment.query_end),
            (alignment.target_begin, alignment.target_end_optimal)
        ]

    metadata1 = metadata2 = None
    if sequence1.has_metadata():
        metadata1 = sequence1.metadata
    if sequence2.has_metadata():
        metadata2 = sequence2.metadata

    constructor = type(sequence1)
    msa = TabularMSA([
        constructor(alignment.aligned_query_sequence, metadata=metadata1,
                    validate=False),
        constructor(alignment.aligned_target_sequence, metadata=metadata2,
                    validate=False)
    ])

    return msa, alignment.optimal_alignment_score, start_end
Exemple #2
0
def local_pairwise_align_ssw(sequence1,
                             sequence2,
                             constructor=Sequence,
                             **kwargs):
    """Align query and target sequences with Striped Smith-Waterman.

    Parameters
    ----------
    sequence1 : str or Sequence
        The first unaligned sequence
    sequence2 : str or Sequence
        The second unaligned sequence
    constructor : Sequence subclass
        A constructor to use if `protein` is not True.

    Returns
    -------
    ``skbio.alignment.Alignment``
        The resulting alignment as an Alignment object

    Notes
    -----
    This is a wrapper for the SSW package [1]_.

    For a complete list of optional keyword-arguments that can be provided,
    see ``skbio.alignment.StripedSmithWaterman``.

    The following kwargs will not have any effect: `suppress_sequences` and
    `zero_index`

    If an alignment does not meet a provided filter, `None` will be returned.

    References
    ----------
    .. [1] Zhao, Mengyao, Wan-Ping Lee, Erik P. Garrison, & Gabor T.
       Marth. "SSW Library: An SIMD Smith-Waterman C/C++ Library for
       Applications". PLOS ONE (2013). Web. 11 July 2014.
       http://www.plosone.org/article/info:doi/10.1371/journal.pone.0082138

    See Also
    --------
    skbio.alignment.StripedSmithWaterman

    """
    # We need the sequences for `Alignment` to make sense, so don't let the
    # user suppress them.
    kwargs['suppress_sequences'] = False
    kwargs['zero_index'] = True

    if isinstance(sequence1, Protein):
        kwargs['protein'] = True

    query = StripedSmithWaterman(str(sequence1), **kwargs)
    alignment = query(str(sequence2))

    # If there is no cigar, then it has failed a filter. Return None.
    if not alignment.cigar:
        return None

    start_end = None
    if alignment.query_begin != -1:
        start_end = [(alignment.query_begin, alignment.query_end),
                     (alignment.target_begin, alignment.target_end_optimal)]
    if kwargs.get('protein', False):
        seqs = [
            Protein(alignment.aligned_query_sequence, metadata={'id':
                                                                'query'}),
            Protein(alignment.aligned_target_sequence,
                    metadata={'id': 'target'})
        ]
    else:
        seqs = [
            constructor(alignment.aligned_query_sequence,
                        metadata={'id': 'query'}),
            constructor(alignment.aligned_target_sequence,
                        metadata={'id': 'target'})
        ]

    return Alignment(seqs,
                     score=alignment.optimal_alignment_score,
                     start_end_positions=start_end)