Example #1
 def __getitem__(self, x: Union[int, slice]) -> 'DSeq':
     fwd: str = self.fwd
     rc_rev: str = self.rc_rev
     overhang: int = self.overhang
     if not isinstance(x, slice):
         sl: slice = slice(x, x + 1, 1)
         sl: slice = x
     start_idx: int = sl.start or 0
     if overhang > 0:    # fwd shifted in 3' direction
         fwd_sl_stop: int = sl.stop - overhang if sl.stop is not None else None
         fwd_sl: slice = slice(start_idx - overhang, fwd_sl_stop, sl.step)
         fwd_out: str = fwd[fwd_sl]
         rev_out: str = reverseComplement(rc_rev[sl])
         overhang_out: int = max(overhang - start_idx, 0)
     else:   # fwd shited in the 5' direction relative to the reverse (negative overhang)
         rev_sl_stop: int = sl.stop + overhang if sl.stop is not None else None
         rev_sl: slice = slice(start_idx + overhang, rev_sl_stop, sl.step)
         rev_out: str = reverseComplement(rc_rev[rev_sl])
         fwd_out: str = fwd[sl]
         overhang_out: int = min(start_idx + overhang, 0)
     return DSeq(fwd_out,
Example #2
    def __add__(self, b: 'VirtualHelix') -> 'VirtualHelix':
        '''(1) Concatenates the forward strand with forward strand
        and the reverse strand with the reverse strand and preserves order
        (2) Realligns the two :class:`VirtualHelix` objects involved

            b: a :class:`VirtualHelix` object

            a :class:`VirtualHelix` object

            ValueError, TypeError
        if isinstance(b, VirtualHelix):
            type3, seq3 = self.three_prime_end()
            type5, seq5 = b.five_prime_end()
            if type3 == type5 and len(seq3) == len(seq5):
                if seq3 != reverseComplement(seq5):
                    raise TypeError("Ends not complimentary")
                fwd = self.fwd + b.fwd
                rev = self.rev + b.rev
                return VirtualHelix(fwd, rev, self.overhang)
                raise TypeError("Ends not compatible")
            raise ValueError("{} object not a DSeq".format(b))
Example #3
    def __add__(self, b: 'DSeq') -> 'DSeq':
        '''(1) Concatenates the forward strand with forward strand
        and the reverse strand with the reverse strand and preserves order
        (2) Realligns the two :class:`DSeq` objects involved.

            b: a :class:`DSeq` object

            a :class:`DSeq` object

            ValueError, TypeError
        if isinstance(b, DSeq):
            if self.is_circular or b.is_circular:
                err: str = "Can't concatenate circular DSeq: {} + {}"
                raise TypeError(err.format(self, b))

            type3, seq3 = self.three_prime_end()
            type5, seq5 = b.five_prime_end()
            if type3 == type5 and  len(seq3) == len(seq5):
                if seq3 != reverseComplement(seq5):
                    raise TypeError("Ends not complimentary")
                fwd = self.fwd + b.fwd
                rev = self.rev + b.rev
                return DSeq(fwd, rev, self.overhang)
                raise TypeError("Ends not compatible")
            raise ValueError("{} object not a DSeq".format(b))
Example #4
    def __add__(self, b: 'DSeq') -> 'DSeq':
        '''(1) Concatenates the forward strand with forward strand
        and the reverse strand with the reverse strand and preserves order
        (2) Realligns the two :class:`DSeq` objects involved.

            b: a :class:`DSeq` object

            a :class:`DSeq` object

            ValueError, TypeError
        if isinstance(b, DSeq):
            if self.is_circular or b.is_circular:
                err: str = "Can't concatenate circular DSeq: {} + {}"
                raise TypeError(err.format(self, b))

            type3, seq3 = self.three_prime_end()
            type5, seq5 = b.five_prime_end()
            if type3 == type5 and len(seq3) == len(seq5):
                if seq3 != reverseComplement(seq5):
                    raise TypeError("Ends not complimentary")
                fwd = self.fwd + b.fwd
                rev = self.rev + b.rev
                return DSeq(fwd, rev, self.overhang)
                raise TypeError("Ends not compatible")
            raise ValueError("{} object not a DSeq".format(b))
Example #5
    def __add__(self, b: 'VirtualHelix') -> 'VirtualHelix':
        '''(1) Concatenates the forward strand with forward strand
        and the reverse strand with the reverse strand and preserves order
        (2) Realligns the two :class:`VirtualHelix` objects involved

            b: a :class:`VirtualHelix` object

            a :class:`VirtualHelix` object

            ValueError, TypeError
        if isinstance(b, VirtualHelix):
            type3, seq3 = self.three_prime_end()
            type5, seq5 = b.five_prime_end()
            if type3 == type5 and len(seq3) == len(seq5):
                if seq3 != reverseComplement(seq5):
                    raise TypeError("Ends not complimentary")
                fwd = self.fwd + b.fwd
                rev = self.rev + b.rev
                return VirtualHelix(fwd, rev, self.overhang)
                raise TypeError("Ends not compatible")
            raise ValueError("{} object not a DSeq".format(b))
Example #6
 def test_revCompProfile(self):
     py_time = timeit.timeit(lambda: reverseComplement("ACGTUMRWSYKVHDBNACGTUMRWSYKVHDBN"),
     capi_time = timeit.timeit(lambda: seqstr.reverseComplement(
                               "ACGTUMRWSYKVHDBNACGTUMRWSYKVHDBN"), number=10000)
     print('\nNative python rev_comp (time for 10000X):', py_time)
     print('C API python rev_comp (time for 10000X): ', capi_time)
Example #7
 def test_revCompProfile(self):
     py_time = timeit.timeit(
         lambda: reverseComplement("ACGTUMRWSYKVHDBNACGTUMRWSYKVHDBN"),
     capi_time = timeit.timeit(lambda: seqstr.reverseComplement(
     print('\nNative python rev_comp (time for 10000X):', py_time)
     print('C API python rev_comp (time for 10000X): ', capi_time)
Example #8
 def isCircularizable(self) -> bool:
         False if already circular or if ends don't match
     if self.is_circular:
         return False
         type3, seq3 = self.three_prime_end()
         type5, seq5 = self.five_prime_end()
         return type3 == type5 and seq3 == reverseComplement(seq5)
Example #9
def checkConstraintFailedCount(candidate, idxs, seq_set):
    failed = 0
    for idx in idxs:
        seq = seq_set[idx]
        if ss.hammingDistance(candidate, seq) < 5:
            # if hamming(candidate, seq) < 5:
            failed += 1
        if ss.hammingDistance(candidate, ss.reverseComplement(seq)) < 5:
            # if hamming(candidate, reverseComp(seq)) < 5:
            failed += 1
    return failed
Example #10
def checkConstraints(candidate, seq_set, hd=5):
    # check GC content
    # if candidate.count('G') + candidate.count('C') != 4:
    #     return False
    # check hamming distance
    for seq in seq_set:
        if ss.hammingDistance(candidate, seq) < hd:
            return False
        if ss.hammingDistance(candidate, ss.reverseComplement(seq)) < hd:
            return False
    return True
Example #11
 def isCircularizable(self) -> bool:
         False if already circular or if ends don't match
     if self.is_circular:
         return False
         type3, seq3 = self.three_prime_end()
         type5, seq5 = self.five_prime_end()
         return type3 == type5 and seq3 == reverseComplement(seq5)
Example #12
 def __getitem__(self, x: Union[int, slice]) -> 'DSeq':
     fwd: str = self.fwd
     rc_rev: str = self.rc_rev
     overhang: int = self.overhang
     if not isinstance(x, slice):
         sl: slice = slice(x, x + 1, 1)
         sl: slice = x
     start_idx: int = sl.start or 0
     if overhang > 0:  # fwd shifted in 3' direction
         fwd_sl_stop: int = sl.stop - overhang if sl.stop is not None else None
         fwd_sl: slice = slice(start_idx - overhang, fwd_sl_stop, sl.step)
         fwd_out: str = fwd[fwd_sl]
         rev_out: str = reverseComplement(rc_rev[sl])
         overhang_out: int = max(overhang - start_idx, 0)
     else:  # fwd shited in the 5' direction relative to the reverse (negative overhang)
         rev_sl_stop: int = sl.stop + overhang if sl.stop is not None else None
         rev_sl: slice = slice(start_idx + overhang, rev_sl_stop, sl.step)
         rev_out: str = reverseComplement(rc_rev[rev_sl])
         fwd_out: str = fwd[sl]
         overhang_out: int = min(start_idx + overhang, 0)
     return DSeq(fwd_out, rev_out, overhang_out, alphabet=self.alphabet)
Example #13
def filterRegionSequence(   query_seq: str,
                            query_strand: int,
                            transcript_id: str,
                            transcript: dict = None,
                            reference_seq: str = None) -> Tuple[str, bool]:
    '''Confirm sequence exists in the transcript and return the aligned to the
    strand direction of the transcript sequence.  NOTE: Sometimes there are
    errors in probes so be sure to validate all sequence lookups!!!

        transcript_dict: Default is None.  If provided omit lookUp call
        reference_seq: Default is None.  If provided omit getSequence call

        Tuple of the form

        sequence, was_rc

        sequence corresponding to the query.  If transcript_id is provided
        the sequence will exist in the transcript and get reverse complemented
        as necessary and was_rc should be checked

        ValueError on sequence not found in the target reference sequence
    was_rc: bool = False
    query_seq_out: str = query_seq
    if transcript is None:
        transcript = lookUpID(transcript_id)
    if reference_seq is None:
        reference_seq = getSequence(transcript_id)
    if transcript['strand'] != query_strand:
        query_seq_out: str = reverseComplement(query_seq)
        was_rc = True
    if query_seq_out not in reference_seq:
        err: str = "Region sequence not in transcript_id: %s: %d, rc: %s"
        raise ValueError(err % (transcript_id, query_strand, was_rc))
    return query_seq_out, was_rc
Example #14
def DSeqVH(fwd: str,
           rev: str = None,
           overhang: int = None,
           alphabet: int = AlphaEnum.DNA) -> VirtualHelix:
    '''Helper function for creating :class:`VirtualHelix` in the style of
    the :class:`DSeq` with strings
    dseq: DSeq = DSeq(fwd, rev, overhang, alphabet)
    overhang: int = dseq.overhang
    if overhang > 0:
        fwd_idx_offsets = [overhang]
        rev_idx_offsets = [0]
        fwd_idx_offsets = [0]
        rev_idx_offsets = [overhang]
    oligo_fwd = Oligo(fwd)
    if rev is None:
        rev = reverseComplement(fwd)
    oligo_rev = Oligo(rev)
    return VirtualHelix([oligo_fwd.strand5p], fwd_idx_offsets,
                        [oligo_rev.strand5p], rev_idx_offsets)
Example #15
def DSeqVH( fwd: str,
            rev: str = None,
            overhang: int = None,
            alphabet: int = AlphaEnum.DNA) -> VirtualHelix:
    '''Helper function for creating :class:`VirtualHelix` in the style of
    the :class:`DSeq` with strings
    dseq: DSeq = DSeq(fwd, rev, overhang, alphabet)
    overhang: int = dseq.overhang
    if overhang > 0:
        fwd_idx_offsets = [overhang]
        rev_idx_offsets = [0]
        fwd_idx_offsets = [0]
        rev_idx_offsets = [overhang]
    oligo_fwd = Oligo(fwd)
    if rev is None:
        rev = reverseComplement(fwd)
    oligo_rev = Oligo(rev)
    return VirtualHelix([oligo_fwd.strand5p],
Example #16
    def __init__(self,
        fwd: str,
        rev: str = None,
        overhang: int = None,
        is_circular: bool = False,
        alphabet: int = AlphaEnum.DNA):
            fwd: ``fwd`` maps to reference in ``ssw-py``
            rev: ``rev`` maps to read in ``ssw-py``
            overhang: Use to force an alignment.  + overhang means the 5' end
                of the fwd strand is shifted in the 3' direction of the rev strand
                - overhang means the 5' end of the fwd strand is shifted in the
                3' direction of the reverse strand
            alphabet: whether this is DNA or RNA

        assert(alphabet in ALPHABETS)
        self.alphabet: int = alphabet
        self.fwd: str = fwd
        self.overhang: int = 0

        if rev is None:
            if overhang is not None:
                raise ValueError("overhang can't be defined for without a reverse strand")
                self.rev: str = reverseComplement(fwd)
            self.rev: str = rev

        max_idx_fwd: int = len(fwd) - 1
        max_idx_rev: int = len(self.rev) - 1
        the_length: int = max(max_idx_fwd, max_idx_rev) + 1 # default

        self.alignment: Alignment
        self.rc_rev: str
        if overhang is None:
            alignment, self.rc_rev = align_complement(fwd, self.rev)

            if max_idx_fwd > alignment.reference_end: # positive overhang
                self.overhang = max_idx_fwd - alignment.reference_end
                the_length: int = max_idx_fwd + alignment.read_start + 1

            elif max_idx_rev > alignment.read_end: # negative overhang
                self.overhang = alignment.read_end - max_idx_fwd
                the_length: int = max_idx_rev + alignment.reference_start + 1
            self.alignment = alignment

            self.overhang = overhang
            if overhang < 0:
                reference_start: int = -overhang
                read_start: int = 0
                delta = min(max_idx_fwd + overhang, max_idx_rev)
                reference_end: int = reference_start + delta
                read_end: int = delta
                the_length: int = max_idx_rev + reference_start + 1
            elif overhang > 0:
                reference_start: int = 0
                read_start: int = overhang
                delta = min(max_idx_fwd, max_idx_rev - read_start)
                reference_end: int = delta
                read_end: int = read_start + delta
                the_length: int = max_idx_fwd + read_start + 1
                reference_start: int = 0
                read_start: int = 0
                delta: int = min(max_idx_fwd, max_idx_rev)
                reference_end: int = delta
                read_end: int = delta
                the_length: int = max(max_idx_fwd, max_idx_rev) + 1
            self.alignment = Alignment(
                '', # null
                0,  # null
                0,  # null
            self.rc_rev = reverseComplement(rev)
        self.the_length: int = the_length
        if is_circular:
            type3, seq3 = self.three_prime_end()
            type5, seq5 = self.five_prime_end()
            if (    (type3 != PrimeEnum.BLUNT) and
                    (type5 != PrimeEnum.BLUNT) ):
                raise ValueError("DNA is_circular but ends can't mate")
        self.is_circular = is_circular
Example #17
 def test_revComp(self):
     for x in range(1000):
         seq = self._randSeq()
         py_revcomp = reverseComplement(seq)
         revcomp = seqstr.reverseComplement(seq)
         self.assertEqual(py_revcomp, revcomp)
Example #18
def align_complement(fwd: str, rev: str) -> Tuple[Alignment, str]:
    rc_rev: str = reverseComplement(rev)
    alignment: Alignment = force_align(rc_rev, fwd)
    return alignment, rc_rev
Example #19
def string_align_complement(
        fwd: str,
        rev: str,
        alignment: Alignment = None,
        rc_rev: str = None,
        do_print: bool = False,
        do_highlight: bool = False) -> Tuple[str, str]:
    if alignment is None:
        alignment, rc_rev = align_complement(fwd, rev)
    if rc_rev is None:
        rc_rev = reverseComplement(rev)
    reverse_rev: str = reverse(rev)

    fwd_idx0: int = alignment.reference_start
    fwd_idx1: int = alignment.reference_end
    rev_idx0: int = alignment.read_start
    rev_idx1: int = alignment.read_end

    max_delta_fwd: int = len(fwd) - fwd_idx0
    max_delta_rev: int = len(rev) - rev_idx0
    if max_delta_fwd < max_delta_rev:
        lim_hi: int = max_delta_fwd
        lim_hi: int = max_delta_rev

    lo_delta: int
    buffer_fwd: str = ''
    buffer_rev: str = ''
    if fwd_idx0 < rev_idx0:
        lo_delta = fwd_idx0
        buffer_fwd = ' '*(rev_idx0 - fwd_idx0)
        lo_delta = rev_idx0
        buffer_rev = ' '*(fwd_idx0 - rev_idx0)

    if do_highlight:
        highlight_rev_list: List[str] = []
        fwd_lo_idx: int = fwd_idx0 - lo_delta
        rev_lo_idx: int = rev_idx0 - lo_delta
        total_delta: int = lo_delta + lim_hi

        for i in range(total_delta):
            reverse_rev_base = reverse_rev[rev_lo_idx + i]
            if rc_rev[rev_lo_idx + i] != fwd[fwd_lo_idx + i]:

            highlight_unformat_rev: str = ''.join(highlight_rev_list)
            highlight_rev: str = highlight(   highlight_unformat_rev,
            out_rev: str = (
                buffer_rev +
                reverse_rev[:rev_lo_idx] +
                highlight_rev.strip().translate(TRANTAB) +
        out_rev: str = buffer_rev + reverse_rev
    out_fwd: str = buffer_fwd + fwd
    if do_print:
    return out_fwd, out_rev
Example #20
def checkOffTarget(primer_str, genome_str, primer_idx, params,
                   hamming_percentile=0.05, genome_rc_str=None):
    """Return the tm, idx, and strand of the strongest off-target hybridization

    The 5' index of the primer on the fwd strand must be provided for masking
    purposes. The reverse complement of the genome, ``genome_rc_str``, may be
    provided as a performance optimization.

    Under the hood, :func:``checkOffTarget`` calculates the hamming distance
    between the primer and the respective underlying sequence at each index
    of the genome and its reverse complement. As a means of optimization,
    only the bottom ``hamming_percentile`` of hamming distance indices will
    also be screened with a thermodynamic alignment. For example, a
    ``hamming_percentile`` of 0.05 will result thermodynamic alignments at
    the indices of the genome with hamming distances from the ``primer_str``
    in the bottom 0.05 percent.

        primer_str (str)            : primer sequence string
        genome_str (str)            : genome sequence string
        primer_idx (int)            : 5'-most index of a primer sequence on the
                                      forward strand, used to mask the binding
        params (dict)               : parameters dictionary used throughout the
                                      pipeline (see

        hamming_percentile (float, optional)  : Hamming distance percentile (0-100)
                                      below which regions surrounding the
                                      respective indices will be subject to
                                      interrogation by thermodynamic alignment
        genome_rc_str (str, optional)         : reverse complement of the genome
                                      (optimization to minimize the number of
                                      times this operation must be performed /
                                      number of memory copies)

        The tm (deg. C), index, and strand of the strongest off-target


    genome_rc_str = genome_rc_str or seqstr.reverseComplement(genome_str)
    primer_length = len(primer_str)

    strand_results = mp.Queue()

    def _fwdStrand():
        fwd_hamming_distances = seqstr.rollingHammingDistance(primer_str,
        fwd_hd_thresh = np.percentile(fwd_hamming_distances, hamming_percentile)
        fwd_primer_footprint = (-(primer_idx+primer_length), (-primer_idx))
        fwd_hamming_distances[fwd_primer_footprint[0]: \
                              fwd_primer_footprint[1]] = primer_length
        fwd_hotspots, = np.where((fwd_hamming_distances < fwd_hd_thresh))
        highest_tm_idx = None
        highest_tm = -100
        for idx in fwd_hotspots:
            tm = primer3.calcHeterodimerTm(
                primer_str, genome_str[-(idx+primer_length):-idx],
            if tm > highest_tm:
                highest_tm_idx = idx
                highest_tm = tm
        strand_results.put((highest_tm, highest_tm_idx, 1))

    def _revStrand():
        rev_hamming_distances = seqstr.rollingHammingDistance(primer_str,
        rev_hd_thresh = np.percentile(rev_hamming_distances, hamming_percentile)
        rev_primer_footprint = ((primer_idx), (primer_idx+primer_length))
        rev_hamming_distances[rev_primer_footprint[0]: \
                              rev_primer_footprint[1]] = primer_length
        rev_hotspots, = np.where((rev_hamming_distances < rev_hd_thresh))

        highest_tm_idx = None
        highest_tm = -100
        for idx in rev_hotspots:
            tm = primer3.calcHeterodimerTm(
                primer_str, genome_rc_str[idx:idx+primer_length],
            if tm > highest_tm:
                highest_tm_idx = idx
                highest_tm = tm
        strand_results.put((highest_tm, highest_tm_idx, 0))

    fwd_proc = mp.Process(target=_fwdStrand)
    rev_proc = mp.Process(target=_revStrand)

    res1 = strand_results.get()
    res2 = strand_results.get()

    return max(res1[0], res2[0])
Example #21
    def __init__(self,
                 fwd: str,
                 rev: str = None,
                 overhang: int = None,
                 is_circular: bool = False,
                 alphabet: int = AlphaEnum.DNA):
            fwd: ``fwd`` maps to reference in ``ssw-py``
            rev: ``rev`` maps to read in ``ssw-py``
            overhang: Use to force an alignment.  + overhang means the 5' end
                of the fwd strand is shifted in the 3' direction of the rev strand
                - overhang means the 5' end of the fwd strand is shifted in the
                3' direction of the reverse strand
            alphabet: whether this is DNA or RNA

        assert (alphabet in ALPHABETS)
        self.alphabet: int = alphabet
        self.fwd: str = fwd
        self.overhang: int = 0

        if rev is None:
            if overhang is not None:
                raise ValueError(
                    "overhang can't be defined for without a reverse strand")
                self.rev: str = reverseComplement(fwd)
            self.rev: str = rev

        max_idx_fwd: int = len(fwd) - 1
        max_idx_rev: int = len(self.rev) - 1
        the_length: int = max(max_idx_fwd, max_idx_rev) + 1  # default

        self.alignment: Alignment
        self.rc_rev: str
        if overhang is None:
            alignment, self.rc_rev = align_complement(fwd, self.rev)

            if max_idx_fwd > alignment.reference_end:  # positive overhang
                self.overhang = max_idx_fwd - alignment.reference_end
                the_length: int = max_idx_fwd + alignment.read_start + 1

            elif max_idx_rev > alignment.read_end:  # negative overhang
                self.overhang = alignment.read_end - max_idx_fwd
                the_length: int = max_idx_rev + alignment.reference_start + 1
            self.alignment = alignment

            self.overhang = overhang
            if overhang < 0:
                reference_start: int = -overhang
                read_start: int = 0
                delta = min(max_idx_fwd + overhang, max_idx_rev)
                reference_end: int = reference_start + delta
                read_end: int = delta
                the_length: int = max_idx_rev + reference_start + 1
            elif overhang > 0:
                reference_start: int = 0
                read_start: int = overhang
                delta = min(max_idx_fwd, max_idx_rev - read_start)
                reference_end: int = delta
                read_end: int = read_start + delta
                the_length: int = max_idx_fwd + read_start + 1
                reference_start: int = 0
                read_start: int = 0
                delta: int = min(max_idx_fwd, max_idx_rev)
                reference_end: int = delta
                read_end: int = delta
                the_length: int = max(max_idx_fwd, max_idx_rev) + 1
            self.alignment = Alignment(
                '',  # null
                0,  # null
                0,  # null
            self.rc_rev = reverseComplement(rev)
        self.the_length: int = the_length
        if is_circular:
            type3, seq3 = self.three_prime_end()
            type5, seq5 = self.five_prime_end()
            if ((type3 != PrimeEnum.BLUNT) and (type5 != PrimeEnum.BLUNT)):
                raise ValueError("DNA is_circular but ends can't mate")
        self.is_circular = is_circular
Example #22
 def test_revComp(self):
     for x in range(1000):
         seq = self._randSeq()
         py_revcomp = reverseComplement(seq)
         revcomp = seqstr.reverseComplement(seq)
         self.assertEqual(py_revcomp, revcomp)