Example #1
0
def trimPrimers(primer, verbose):
    """
    @param primer: A BioPython C{Bio.Seq} primer sequence.
    @param verbose: A C{bool}, if C{True} output additional information about
        how often and where primers were found.
    """
    reads = []
    absentCount = forwardCount = reverseCount = count = 0
    for seqRecord in SeqIO.parse(sys.stdin, 'fasta'):
        count += 1
        start, end = findPrimerBidiLimits(primer, seqRecord.seq)
        if start == 0:
            if end == len(seqRecord):
                absentCount += 1
            else:
                reverseCount += 1
        else:
            forwardCount += 1
            if end != len(seqRecord):
                reverseCount += 1
        reads.append(seqRecord[start:end])

    if verbose:
        print(('Read %d sequences. Found forward: %d, '
               'Found reversed: %d, Absent: %d') %
              (count, forwardCount, reverseCount, absentCount),
              file=sys.stderr)

    SeqIO.write(reads, sys.stdout, 'fasta')
Example #2
0
 def testFoundMultiple(self):
     """
     If a primer is found multiple times, the correct value
     must be returned.
     """
     seq = Seq('ACGTACGT', IUPAC.unambiguous_dna)
     self.assertEqual((7, 8), findPrimerBidiLimits('ACG', seq))
Example #3
0
def trimPrimers(primer, verbose):
    """
    @param primer: A BioPython C{Bio.Seq} primer sequence.
    @param verbose: A C{bool}, if C{True} output additional information about
        how often and where primers were found.
    """
    reads = []
    absentCount = forwardCount = reverseCount = count = 0
    for seqRecord in SeqIO.parse(sys.stdin, 'fasta'):
        count += 1
        start, end = findPrimerBidiLimits(primer, seqRecord.seq)
        if start == 0:
            if end == len(seqRecord):
                absentCount += 1
            else:
                reverseCount += 1
        else:
            forwardCount += 1
            if end != len(seqRecord):
                reverseCount += 1
        reads.append(seqRecord[start:end])

    if verbose:
        print((
            'Read %d sequences. Found forward: %d, '
            'Found reversed: %d, Absent: %d') % (
            count, forwardCount, reverseCount, absentCount), file=sys.stderr)

    SeqIO.write(reads, sys.stdout, 'fasta')
Example #4
0
 def testOverlappingBackwards(self):
     """
     If a primer is present twice backwards but is overlapping, only
     the first instance should be returned.
     """
     seq = Seq('GTTT', IUPAC.unambiguous_dna)
     self.assertEqual((0, 1), findPrimerBidiLimits('AA', seq))
Example #5
0
 def testNotFound(self):
     """
     If a primer is not found, the returned offsets must include
     the whole sequence.
     """
     seq = Seq('ACGT', IUPAC.unambiguous_dna)
     self.assertEqual((0, 4), findPrimerBidiLimits('BLAH', seq))
Example #6
0
 def testOverlappingBackwards(self):
     """
     If a primer is present twice backwards but is overlapping, only
     the first instance should be returned.
     """
     seq = Seq('GTTT', IUPAC.unambiguous_dna)
     self.assertEqual((0, 1), findPrimerBidiLimits('AA', seq))
Example #7
0
 def testFoundMultiple(self):
     """
     If a primer is found multiple times, the correct value
     must be returned.
     """
     seq = Seq('ACGTACGT', IUPAC.unambiguous_dna)
     self.assertEqual((7, 8), findPrimerBidiLimits('ACG', seq))
Example #8
0
 def testNotFound(self):
     """
     If a primer is not found, the returned offsets must include
     the whole sequence.
     """
     seq = Seq('ACGT', IUPAC.unambiguous_dna)
     self.assertEqual((0, 4), findPrimerBidiLimits('BLAH', seq))
Example #9
0
 def testOverlappingForwards(self):
     """
     If a primer is present twice forwards but is overlapping, only
     the first instance should be returned.
     """
     seq = Seq('GAAA')
     self.assertEqual((3, 4), findPrimerBidiLimits('AA', seq))
Example #10
0
 def testFoundEndStart(self):
     """
     If a primer is found in both directions in a sequence (end of
     the forward sequence, start of the reverse complement), the
     correct value must be returned.
     """
     seq = Seq('ACGT', IUPAC.unambiguous_dna)
     self.assertEqual((4, 4), findPrimerBidiLimits('GT', seq))
Example #11
0
 def testFoundEndStart(self):
     """
     If a primer is found in both directions in a sequence (end of
     the forward sequence, start of the reverse complement), the
     correct value must be returned.
     """
     seq = Seq('ACGT', IUPAC.unambiguous_dna)
     self.assertEqual((4, 4), findPrimerBidiLimits('GT', seq))
Example #12
0
 def testFoundStartEnd(self):
     """
     If a primer is found in both directions in a sequence (start of
     the forward sequence, end of the reverse complement), the
     correct value must be returned.
     """
     seq = Seq('ACGT')
     self.assertEqual((2, 2), findPrimerBidiLimits('AC', seq))
Example #13
0
 def testLonger(self):
     """
     Test a longer sequence.
     """
     seq = Seq('AAAAAAAAAA'
               'GGGGGGGGGG'
               'AAAAAAAAAA'
               'AAAAAAAAAA', IUPAC.unambiguous_dna)
     self.assertEqual((20, 40), findPrimerBidiLimits('GGGGGGGGGG', seq))
Example #14
0
 def testLonger(self):
     """
     Test a longer sequence.
     """
     seq = Seq('AAAAAAAAAA'
               'GGGGGGGGGG'
               'AAAAAAAAAA'
               'AAAAAAAAAA',
               IUPAC.unambiguous_dna)
     self.assertEqual((20, 40), findPrimerBidiLimits('GGGGGGGGGG', seq))