Example #1
0
 def reverseComplement(self, preserveHeader=False):
     """
     Return a new FastaRecord with the reverse-complemented DNA sequence.
     Optionally, supply a name
     """
     rcSequence = sequence.reverseComplement(self.sequence)
     if preserveHeader:
         return FastaRecord(self.header, rcSequence)
     else:
         rcName = '{0} [revcomp]'.format(self.header.strip())
         return FastaRecord(rcName, rcSequence)
Example #2
0
 def reverseComplement(self, preserveHeader=False):
     """
     Return a new FastaRecord with the reverse-complemented DNA sequence.
     Optionally, supply a name
     """
     rcSequence = sequence.reverseComplement(self.sequence)
     if preserveHeader:
         return FastaRecord(self.header, rcSequence)
     else:
         rcName = '{0} [revcomp]'.format(self.header.strip())
         return FastaRecord(rcName, rcSequence)
Example #3
0
 def reference(self, aligned=True, orientation="native"):
     if not (orientation == "native" or orientation == "genomic"):
         raise ValueError, "Bad `orientation` value"
     tSeq = self.bam.referenceFasta[self.referenceName].sequence[self.tStart : self.tEnd]
     shouldRC = orientation == "native" and self.isReverseStrand
     tSeqOriented = reverseComplement(tSeq) if shouldRC else tSeq
     if aligned:
         x = np.fromstring(tSeqOriented, dtype=np.int8)
         y = self._gapifyRef(x, orientation)
         return y.tostring()
     else:
         return tSeqOriented
Example #4
0
 def reference(self, aligned=True, orientation="native"):
     if not (orientation == "native" or orientation == "genomic"):
         raise ValueError, "Bad `orientation` value"
     tSeq = self.bam.referenceFasta[self.referenceName].sequence[self.tStart:self.tEnd]
     shouldRC = orientation == "native" and self.isReverseStrand
     tSeqOriented = reverseComplement(tSeq) if shouldRC else tSeq
     if aligned:
         x = np.fromstring(tSeqOriented, dtype=np.int8)
         y = self._gapifyRef(x, orientation)
         return y.tostring()
     else:
         return tSeqOriented
 def test_reverse_complement_error(self):
     sequence.reverseComplement(self.bad_sequence)
 def test_iupac(self):
     assert_equal(self.iupac_complement,
                  sequence.complement(self.iupac_sequence))
     assert_equal(self.iupac_reverse_complement,
                  sequence.reverseComplement(self.iupac_sequence))
 def test_reverseComplement(self):
     assert_equal(self.reverse_complement,
                  sequence.reverseComplement(self.sequence))
     assert_equal(self.sequence,
                  sequence.reverseComplement(self.reverse_complement))
Example #8
0
 def test_reverse_complement_error(self):
     sequence.reverseComplement(self.bad_sequence)
Example #9
0
 def test_iupac(self):
     assert_equal(self.iupac_complement,
                  sequence.complement(self.iupac_sequence))
     assert_equal(self.iupac_reverse_complement,
                  sequence.reverseComplement(self.iupac_sequence))
Example #10
0
 def test_reverseComplement(self):
     assert_equal(self.reverse_complement,
                  sequence.reverseComplement(self.sequence))
     assert_equal(self.sequence,
                  sequence.reverseComplement(self.reverse_complement))
 def test_reverse_complement_error(self):
     with pytest.raises(ValueError):
         sequence.reverseComplement(self.BAD_SEQUENCE)
 def test_iupac(self):
     assert self.IUPAC_COMPLEMENT == sequence.complement(
         self.IUPAC_SEQUENCE)
     assert self.IUPAC_REVERSE_COMPLEMENT == sequence.reverseComplement(
         self.IUPAC_SEQUENCE)
 def test_reverseComplement(self):
     assert self.REVERSE_COMPLEMENT == sequence.reverseComplement(
         self.SEQUENCE)
     assert self.SEQUENCE == sequence.reverseComplement(
         self.REVERSE_COMPLEMENT)
Example #14
0
def extractIPDonlyTgenomic(cbamfile, alncbamfile, sbamfile, zmwqueue, outqueue): #filebase, nit):
    with pb.IndexedBamReader(alncbamfile) as alncbam, pb.IndexedBamReader(sbamfile) as sbam,\
         pb.IndexedBamReader(cbamfile) as cbam:
        zmw = zmwqueue.get()
        while zmw is not None:

            cc = cbam.readsByHoleNumber(zmw)[0]
            ccread = cc.read(aligned=False, orientation='native')
            subrs = sbam.readsByHoleNumber(zmw)
            ccalns = alncbam.readsByHoleNumber(zmw)
            
            zmres = {}
            
            if len(ccalns) > 0:
                alnlen = np.array([ccal.readLength for ccal in ccalns])
                usealn = np.where(alnlen == np.max(alnlen))[0][0]
            elif len(ccalns) == 1:
                usealn = 0
            elif len(ccalns) == 0:
                usealn = None

            allipds = np.empty((len(subrs), len(ccread)), dtype='float32')
            allipds.fill(np.nan)
            subrOrient = np.empty(len(subrs), dtype='bool')

            for index, sr in enumerate(subrs):
                # Test if this subread aligns to the forward or reverse of the CCS
                forwardSread = sr.read(aligned=False, orientation='native')
                reverseSread = reverseComplement(forwardSread)
                faln = edlib.align(forwardSread, ccread, mode='NW', task='path')
                raln = edlib.align(reverseSread, ccread, mode='NW', task='path')
                if faln['editDistance'] < raln['editDistance']:
                    subrOrient[index] = True
                    alndir = faln
                    useread = forwardSread
                else:
                    subrOrient[index] = False
                    alndir = raln
                    useread = reverseSread

                # Use the alignment information to extract IPD at each base that aligns to the CCS
                origb = np.empty(len(useread), dtype=np.int16 )
                origb.fill(np.nan)
                ccb = np.empty(len(useread), dtype=np.int16)
                ccb.fill(np.nan)
                subI = 0
                ccI = 0
                for m in p.finditer(alndir['cigar']):
                    lg = int(m.group()[-len(m.group()):-1])
                    mtype = m.group()[-1]
                    if mtype == '=':
                        origb[subI:(subI + lg)] = range(subI, subI + lg)
                        ccb[subI:(subI + lg)] = range(ccI, ccI + lg)
                        subI += lg
                        ccI += lg
                    elif mtype == 'X':
                        subI += lg
                        ccI += lg
                    elif mtype == 'I':
                        subI += lg
                    elif mtype == 'D':
                        ccI += lg

                ccb = ccb[~np.isnan(ccb)]
                origb = origb[~np.isnan(origb)]
                if not subrOrient[index]:
                    for i in range(len(origb)):
                        origb[i] = -1 - origb[i]

                ipds = sr.baseFeature('Ipd',aligned=False, orientation="native")
                allipds[index, ccb] = ipds[origb]

            # Normalize the IPD values from each subread
            allipds = allipds / np.mean(np.percentile(allipds[~np.isnan(allipds)],usepercentiles))

            readisb = {refb:np.where([b == refb for b in ccread])[0] for refb in ['A','C','G','T']}

            # Take the mean IPD at each position
            with warnings.catch_warnings(): # ignoring warnings from taking the mean of columns that are all NaN
                warnings.simplefilter("ignore", category=RuntimeWarning)
                forwardMean = np.nanmean(allipds[subrOrient == True,:], axis=0)
                reverseMean = np.nanmean(allipds[subrOrient == False,:], axis=0)
            # get the mean at just Ts
            tonlyMean = np.empty(len(ccread), dtype='float32')
            tonlyMean.fill(np.nan)
            tonlyMean[readisb['T']] = forwardMean[readisb['T']]
            tonlyMean[readisb['A']] = reverseMean[readisb['A']]

            # Save useful information about this molecule
            zmres['zmw'] = zmw
            zmres['cclen'] = len(ccread)
            zmres['nsubr'] = len(subrs)
            zmres['naln'] = len(ccalns)
            if usealn is not None:
                zmres['chr'] = ccalns[usealn].referenceName
                zmres['refStart'] = ccalns[usealn].referenceStart
                zmres['refEnd'] = ccalns[usealn].referenceEnd
                zmres['alnStart'] = ccalns[usealn].aStart
                zmres['alnEnd'] = ccalns[usealn].aEnd
            else:
                zmres['chr'] = "noAlignment"
                zmres['refStart'] = -1
                zmres['refEnd'] = -1
                zmres['alnStart'] = -1
                zmres['alnEnd'] = -1
            with warnings.catch_warnings(): # ignoring warnings from taking the mean of all NaN
                warnings.simplefilter("ignore", category=RuntimeWarning)
                # Something is wrong with the read if these throw a warning, but still no need to print on command line
                # The stored value will be NaN
                zmres['basemeanA'] = np.nanmean(np.concatenate([forwardMean[readisb['A']], reverseMean[readisb['T']]],axis=None))
                zmres['basemeanC'] = np.nanmean(np.concatenate([forwardMean[readisb['C']], reverseMean[readisb['G']]],axis=None))
                zmres['basemeanG'] = np.nanmean(np.concatenate([forwardMean[readisb['G']], reverseMean[readisb['C']]],axis=None))
                zmres['basemeanT'] = np.nanmean(np.concatenate([forwardMean[readisb['T']], reverseMean[readisb['A']]],axis=None))

            zmres['onlyt'] = tonlyMean
            
            # binarize the onlyT ipds
            respf, gmmeans, gmweights, aic, convinf = fitGaussian(tonlyMean, initmean=True, zmw=zmw)
            zmres['bingmm'] = respf
            zmres['gmmlowmean'] = gmmeans[0]
            zmres['gmmlowweight'] = gmweights[0]
            zmres['gmmhighmean'] = gmmeans[1]
            zmres['gmmhighweight'] = gmweights[1]
            zmres['aicDiff'] = aic
            zmres['convergeInf'] = convinf
            
            outqueue.put(zmres) # put the results in the output queue
            zmwqueue.task_done()
            zmw = zmwqueue.get()
        zmwqueue.task_done()
        outqueue.put(None)