Example #1
0
def primerStrip(file, GoodOut, BadOut, fwdprimer, revprimer):
    PL = len(fwdprimer)
    with open(GoodOut, 'w') as good:
        with open(BadOut, 'w') as bad:
            for title, seq, qual in FastqGeneralIterator(open(file)):
                Diffs = primer.MatchPrefix(seq, fwdprimer)
                if Diffs <= args.primer_mismatch:
                    Seq = seq[PL:]
                    Qual = qual[PL:]
                    if revprimer:#now need to look for reverse primer
                        BestPosRev, BestDiffsRev = primer.BestMatch2(Seq, revcomp_lib.RevComp(revprimer), args.primer_mismatch)
                        if BestPosRev > 0:  #reverse primer was found
                            Seq = Seq[:BestPosRev]
                            Qual = Qual[:BestPosRev]                                           
                    good.write("@%s\n%s\n+\n%s\n" % (title, Seq, Qual))
                else:
                    bad.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))                   
Example #2
0
         if Barcode == "":  #if not found, move onto next record
             continue
         BarcodeLength = len(Barcode)
         seq = seq[BarcodeLength:]
         qual = qual[BarcodeLength:]
         #look for forward primer
         if args.require_primer != 'off':  #means we only want ones with forward primer and or reverse
             Diffs = primer.MatchPrefix(seq, FwdPrimer)
             if Diffs > args.primer_mismatch:
                 continue
             #if found, trim away primer
             seq = seq[trim:]
             qual = qual[trim:]
             if args.require_primer == 'both':
                 #look for reverse primer, strip if found
                 BestPosRev, BestDiffsRev = primer.BestMatch2(
                     seq, ReverseCompRev, args.primer_mismatch)
                 if BestPosRev > 0:
                     seq = seq[:BestPosRev]
                     qual = qual[:BestPosRev]
                 else:
                     continue
         #check size
         if len(
                 seq
         ) < args.min_len:  #filter out sequences less than minimum length.
             continue
         runningTotal += 1
         fileout = os.path.join(args.out, BarcodeLabel)
         with open(fileout, 'ab') as output:
             output.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
 if args.require_primer == 'off':
Example #3
0
def processRead(input):
    base = os.path.basename(input).split('.')[0]
    PL = len(FwdPrimer)
    RL = len(RevPrimer)
    DemuxOut = os.path.join(tmpdir, base+'.demux.fq')
    StatsOut = os.path.join(tmpdir, base+'.stats')
    Total = 0
    NoBarcode = 0
    NoRevBarcode = 0
    NoPrimer = 0
    TooShort = 0
    RevPrimerFound = 0
    ValidSeqs = 0
    with open(StatsOut, 'w') as counts:
        with open(DemuxOut, 'w') as out:   
            for title, seq, qual in FastqGeneralIterator(open(input)):
                Total += 1
                #look for barcode, trim it off
                Barcode, BarcodeLabel = FindBarcode(seq, Barcodes)
                if Barcode == "":
                    NoBarcode += 1
                    continue
                BarcodeLength = len(Barcode)
                Seq = seq[BarcodeLength:]
                Qual = qual[BarcodeLength:]
                #now search for forward primer
                Diffs = primer.MatchPrefix(Seq, FwdPrimer)
                if Diffs > args.primer_mismatch:
                    NoPrimer += 1
                    continue
                ForTrim = PL      
                #now search for reverse primer
                BestPosRev, BestDiffsRev = primer.BestMatch2(Seq, RevPrimer, args.primer_mismatch)
                if BestPosRev > 0:  #reverse primer was found
                    RevPrimerFound += 1 
                    #location to trim sequences
                    RevTrim = BestPosRev                
                    #determine reverse barcode
                    if args.reverse_barcode:
                        RevBCdiffs = 0
                        BCcut = BestPosRev + RL
                        CutSeq = Seq[BCcut:]
                        RevBarcode, RevBarcodeLabel = FindBarcode(CutSeq, RevBarcodes)
                        if RevBarcode == "":
                            NoRevBarcode += 1
                            continue
                        BarcodeLabel = BarcodeLabel+'_'+RevBarcodeLabel                       
                    #now trim record remove forward and reverse reads
                    Seq = Seq[ForTrim:RevTrim]
                    Qual = Qual[ForTrim:RevTrim]
                    #since found reverse primer, now also need to pad/trim
                    if not args.full_length:
                        #check minimum length here or primer dimer type sequences will get padded with Ns
                        if len(Seq) < int(args.min_len):
                            TooShort += 1
                            continue
                        if len(Seq) < args.trim_len and args.pad == 'on':
                            pad = args.trim_len - len(Seq)
                            Seq = Seq + pad*'N'
                            Qual = Qual +pad*'J'
                        else: #len(Seq) > args.trim_len:
                            Seq = Seq[:args.trim_len]
                            Qual = Qual[:args.trim_len]
                else:
                    #trim record, did not find reverse primer
                    if args.full_length: #if full length then move to next record
                        continue
                    #trim away forward primer
                    Seq = Seq[ForTrim:]
                    Qual = Qual[ForTrim:]
                    #check length and trim, throw away if too short as it was bad read
                    if len(Seq) < args.trim_len:
                        TooShort += 1
                        continue
                    Seq = Seq[:args.trim_len]
                    Qual = Qual[:args.trim_len]
                #check minimum length
                if len(Seq) < int(args.min_len):
                    TooShort += 1
                    continue
                ValidSeqs += 1
                #rename header
                Name = 'R_'+str(ValidSeqs)+';barcodelabel='+BarcodeLabel+';'
                out.write("@%s\n%s\n+\n%s\n" % (Name, Seq, Qual))
            counts.write('%i,%i,%i,%i,%i,%i,%i\n' % (Total, NoBarcode, NoPrimer, RevPrimerFound, NoRevBarcode, TooShort, ValidSeqs))
Example #4
0
def ProcessReads(records):
    global OutCount
    for rec in records:
        #convert to string for processing
        Seq = str(rec.seq)

        #look for barcodes
        Barcode, BarcodeLabel = FindBarcode(Seq, Barcodes)
        if Barcode == "":  #if not found, try to find with mismatches
            if args.barcode_mismatch > 0:
                hit = [None, None, 0, None, None]
                for k, v in Barcodes.items():
                    alignment = amptklib.fuzzymatch(v, Seq,
                                                    args.barcode_mismatch)
                    if alignment:
                        if alignment[0] > hit[2]:
                            hit = [
                                k, v, alignment[0], alignment[1], alignment[2]
                            ]
                if hit[0] != None:
                    BarcodeLength = hit[4] - hit[
                        3]  #might be shorter than actual barcode
                    BarcodeLabel = hit[0]
                    Barcode = hit[1]
                else:
                    continue
            else:
                continue
        else:  #barcode was found from dictionary
            BarcodeLength = len(Barcode)

        #now look for primer, if not found, move onto next record
        BestPosFor, BestDiffsFor = primer.BestMatch2(Seq, FwdPrimer,
                                                     MAX_PRIMER_MISMATCHES)
        if BestPosFor > 0 and BestPosFor <= BarcodeLength + 2:  #if found will be > 0, and should be found after barcode
            ForTrim = BestPosFor + PL
        else:
            continue

        #counter for numbering reads
        OutCount += 1

        #look for reverse primer
        BestPosRev, BestDiffsRev = primer.BestMatch2(Seq, RevPrimer,
                                                     MAX_PRIMER_MISMATCHES)
        if BestPosRev > 0:  #reverse primer was found
            #location to trim sequences
            RevTrim = BestPosRev

            #determine reverse barcode
            if args.reverse_barcode:
                BCcut = BestPosRev + RL
                CutSeq = Seq[BCcut:]
                if not CutSeq in RevBarcodes:
                    if args.barcode_mismatch > 0:
                        hit = [None, None, 0, None, None]
                        for k, v in RevBarcodes.items():
                            alignment = amptklib.fuzzymatch(
                                k, CutSeq, args.barcode_mismatch)
                            if alignment:
                                if alignment[0] > hit[2]:
                                    hit = [
                                        v, k, alignment[0], alignment[1],
                                        alignment[2]
                                    ]
                        if hit[0] != None:
                            BCname = hit[0]
                        else:
                            continue
                    else:
                        continue
                else:
                    BCname = RevBarcodes.get(CutSeq)
                #update name
                BarcodeLabel = BarcodeLabel + '_' + BCname

            #trim record
            rec = TrimRead(rec, ForTrim, RevTrim, BarcodeLabel, OutCount)

            #check length
            L = len(rec.seq)
            if L < MinLen:
                continue
            if not args.full_length:
                #now check trim length, pad if necessary
                if L < TrimLen:
                    pad = TrimLen - L
                    Seq = str(rec.seq)
                    Seq = Seq + pad * 'N'
                    Qual = rec.letter_annotations["phred_quality"]
                    pad = TrimLen - L
                    add = [40] * pad
                    Qual.extend(add)
                    del rec.letter_annotations["phred_quality"]
                    rec.seq = Seq
                    rec.letter_annotations["phred_quality"] = Qual
                    yield rec
                elif L >= TrimLen:
                    rec = rec[:TrimLen]
                    yield rec
            else:
                yield rec

        else:  #if it is full length, we did not find reverse primer, so drop read
            if not args.full_length:
                #trim record
                rec = TrimRead(rec, ForTrim, False, BarcodeLabel, OutCount)
                #check length
                L = len(rec.seq)
                if L < MinLen:  #remove if shorter than minimum length
                    continue
                #truncate down to trim length
                if L >= TrimLen:
                    rec = rec[:TrimLen]
                    yield rec
Example #5
0
def OnRec(Label, Seq, Qual):
    global PL, LabelPrefix, SeqCount, OutCount, TooShortCount, PadCount
    global FwdPrimerMismatchCount, RevPrimerStrippedCount
    global FwdPrimer, RevPrimer

    if SeqCount == 0:
        progress.InitFile(fastq.File)

    progress.File("%u reads, %u outupt, %u bad fwd primer, %u rev primer stripped, %u too short. %u padded" % \
      (SeqCount, OutCount, FwdPrimerMismatchCount, RevPrimerStrippedCount, TooShortCount, PadCount))

    SeqCount += 1
    Seq = Seq
    Qual = Qual
    Diffs = MatchesPrimer(Seq, FwdPrimer)
    if Diffs > MAX_PRIMER_MISMATCHES:
        FwdPrimerMismatchCount += 1
        return

    OutCount += 1
    Label = LabelPrefix + str(OutCount) + ";barcodelabel=" + SampleLabel + ";"

    # Strip fwd primer
    Seq = Seq[PL:]
    Qual = Qual[PL:]

    BestPosRev, BestDiffsRev = primer.BestMatch2(Seq, RevPrimer,
                                                 MAX_PRIMER_MISMATCHES)
    if BestPosRev > 0:
        # Strip rev primer
        RevPrimerStrippedCount += 1
        StrippedSeq = Seq[:BestPosRev]
        StrippedQual = Qual[:BestPosRev]

        # correctness checks
        if 1:
            Tail = Seq[BestPosRev:]
            Diffs2 = primer.MatchPrefix(Tail, RevPrimer)
            if Diffs2 != BestDiffsRev:
                print >> sys.stderr
                print >> sys.stderr, " Seq=" + Seq
                print >> sys.stderr, "Tail=" + Tail
                print >> sys.stderr, "RevP=" + RevPrimer
                die.Die("BestPosRev %u Diffs2 %u BestDiffsRev %u" %
                        (BestPosRev, Diffs2, BestDiffsRev))
            assert StrippedSeq + Tail == Seq

        Seq = StrippedSeq
        Qual = StrippedQual

        L = len(Seq)
        assert len(Qual) == L

        if L < MinLen:
            return

        if L < TrimLen:
            PadCount += 1
            Seq = Seq + (TrimLen - L) * 'N'
            Qual = Qual + (TrimLen - L) * 'I'
            L = len(Seq)
            assert L == TrimLen
            assert len(Qual) == TrimLen

    L = len(Seq)
    if L < TrimLen:
        TooShortCount += 1
        return

    if L > TrimLen:
        Seq = Seq[:TrimLen]
        Qual = Qual[:TrimLen]
        L = len(Seq)

    assert L == TrimLen
    assert len(Qual) == TrimLen

    fastq.WriteRec(out_file, Label, Seq, Qual)
Example #6
0
def stripPrimer(records):
    for rec in records:
        if args.utax == 'unite2utax':
            latin = unicode(rec.description, 'utf-8')
            test = latin.encode('ascii', 'latin2ascii')
            fields = test.split("|")
            for i in fields:
                if i.startswith("k__"):
                    tax = i
                elif i.startswith("SH"):
                    unite = i
                elif i.startswith("re"):
                    reps = i
                else:
                    gbID = i
            taxonomy = re.sub(";", ",", tax)
            taxonomy = re.sub("__", ":", taxonomy)
            tf = taxonomy.split(",")
            k = tf[0]
            k = re.sub('_', ' ', k)
            p = tf[1]
            p = re.sub('_', ' ', p)
            c = tf[2]
            c = re.sub('_', ' ', c)
            o = tf[3]
            o = re.sub('_', ' ', o)
            f = tf[4]
            f = re.sub('_', ' ', f)
            g = tf[5]
            g = re.sub('_', ' ', g)
            s = tf[6]
            s = re.sub('[(].*$', '', s)
            s = re.sub('_', ' ', s)
            s = re.sub('\.', '', s)
            test_species = s.split(' ')
            if len(test_species) < 2:
                s = 's:'
            reformat_tax = []
            removal = ("unidentified", "Incertae", "uncultured", "Group",
                       "incertae")
            sp_removal = (" sp", "_sp", "uncultured", "isolate", "mycorrhizae",
                          "vouchered", "fungal", "basidiomycete", "ascomycete",
                          "fungus", "symbiont")
            if not any(x in k for x in removal):
                reformat_tax.append(k)
            if not any(x in p for x in removal):
                reformat_tax.append(p)
            if not any(x in c for x in removal):
                reformat_tax.append(c)
            if not any(x in o for x in removal):
                reformat_tax.append(o)
            if not any(x in f for x in removal):
                reformat_tax.append(f)
            if not any(x in g for x in removal):
                reformat_tax.append(g)
            if not any(x in s for x in sp_removal):
                reformat_tax.append(s)
            rec.id = gbID + ";tax=" + ",".join(reformat_tax)
            rec.id = re.sub(",s:$", "", rec.id)
            rec.id = re.sub("=s:$", "=", rec.id)
            if rec.id.endswith(
                    ";tax="):  #if there is no taxonomy, get rid of it
                rec.id = ""
            rec.name = ""
            rec.description = ""
        elif args.utax == 'rdp2utax':
            latin = unicode(rec.description, 'utf-8')
            test = latin.encode('ascii', 'latin2ascii')
            temp = test.split("\t")
            taxLevels = temp[-1]
            split_temp = temp[0].split(";")
            ID = split_temp[0].split(" ")[0]
            s = "s:" + split_temp[0].split(" ", 1)[-1]
            s = re.sub('[(].*$', '', s)
            s = re.sub(',', '_', s)
            s = re.sub('\.', '', s)
            test_species = s.split(' ')
            if len(test_species) < 2:
                s = 's:'
            split_tax = taxLevels.split(";")
            if "domain" in split_tax:
                ki = split_tax.index("domain") - 1
                k = "k:" + split_tax[ki]
                k = k.replace('"', '')
                k = k.split(" ")[0]
            else:
                k = ""
            if "phylum" in split_tax:
                pi = split_tax.index("phylum") - 1
                p = "p:" + split_tax[pi]
                p = p.replace('"', '')
                p = p.split(" ")[0]
            else:
                p = ""
            if "class" in split_tax:
                ci = split_tax.index("class") - 1
                c = "c:" + split_tax[ci]
                c = c.replace('"', '')
                c = c.split(" ")[0]
            else:
                c = ""
            if "order" in split_tax:
                oi = split_tax.index("order") - 1
                o = "o:" + split_tax[oi]
                o = o.replace('"', '')
                o = o.split(" ")[0]
            else:
                o = ""
            if "family" in split_tax:
                fi = split_tax.index("family") - 1
                f = "f:" + split_tax[fi]
                f = f.replace('"', '')
                f = f.split(" ")[0]
            else:
                f = ""
            if "genus" in split_tax:
                gi = split_tax.index("genus") - 1
                g = "g:" + split_tax[gi]
                g = g.replace('"', '')
                g = g.split(" ")[0]
            else:
                g = ""
            reformat_tax = []
            removal = ("unidentified", "Incertae", "uncultured", "Group",
                       "incertae", "Chloroplast", "unclassified", "Family")
            sp_removal = (" sp", "_sp", "uncultured", "isolate", "mycorrhizae",
                          "vouchered", "fungal", "basidiomycete", "ascomycete",
                          "fungus", "symbiont", "unclassified", "unidentified",
                          "bacterium", "phytoplasma")
            if not any(x in k for x in removal) and k != "":
                reformat_tax.append(k)
            if not any(x in p for x in removal) and p != "":
                reformat_tax.append(p)
            if not any(x in c for x in removal) and c != "":
                reformat_tax.append(c)
            if not any(x in o for x in removal) and o != "":
                reformat_tax.append(o)
            if not any(x in f for x in removal) and f != "":
                reformat_tax.append(f)
            if not any(x in g for x in removal) and g != "":
                reformat_tax.append(g)
            if not any(x in s for x in sp_removal):
                reformat_tax.append(s)
            rec.id = ID + ";tax=" + ",".join(reformat_tax)
            rec.id = re.sub(",s:$", "", rec.id)
            if rec.id.endswith(
                    ";tax="):  #if there is no taxonomy, get rid of it
                rec.id = ""
            rec.name = ""
            rec.description = ""
        if not args.trimming:
            Seq = rec.seq
            MAX_PRIMER_MISMATCHES = int(args.primer_mismatch)
            revPrimer = revcomp_lib.RevComp(RevPrimer)
            BestPosFor, BestDiffsFor = primer.BestMatch2(
                Seq, FwdPrimer, MAX_PRIMER_MISMATCHES)
            if BestDiffsFor < MAX_PRIMER_MISMATCHES:
                if BestPosFor > 0:
                    stripfwdlen = fwdLen + BestPosFor
                    StripSeq = Seq[stripfwdlen:]

                    #now look for reverse
                    BestPosRev, BestDiffsRev = primer.BestMatch2(
                        StripSeq, revPrimer, MAX_PRIMER_MISMATCHES)
                    if BestDiffsRev < MAX_PRIMER_MISMATCHES:
                        StrippedSeq = StripSeq[:BestPosRev]
                    else:
                        StrippedSeq = StripSeq
                    #after stripping primers, check for ambig bases
                    if args.drop_ns != 0 and 'N' * args.drop_ns in StrippedSeq:
                        continue
                    rec.seq = StrippedSeq
                    if rec.id != "" and rec.seq != "" and len(rec.seq) > 50:
                        yield rec
            else:  #if can't find forward primer, try to reverse complement and look again
                RevSeq = revcomp_lib.RevComp(Seq)
                BestPosFor, BestDiffsFor = primer.BestMatch2(
                    RevSeq, FwdPrimer, MAX_PRIMER_MISMATCHES)
                if BestDiffsFor < MAX_PRIMER_MISMATCHES:
                    if BestPosFor > 0:
                        stripfwdlen = fwdLen + BestPosFor
                        StripSeq = Seq[stripfwdlen:]

                        #now look for reverse
                        BestPosRev, BestDiffsRev = primer.BestMatch2(
                            StripSeq, revPrimer, MAX_PRIMER_MISMATCHES)
                        if BestDiffsRev < MAX_PRIMER_MISMATCHES:
                            StrippedSeq = StripSeq[:BestPosRev]
                        else:
                            StrippedSeq = StripSeq
                        #after stripping primers, check for ambig bases
                        if args.drop_ns != 0 and 'N' * args.drop_ns in StrippedSeq:
                            continue
                        rec.seq = StrippedSeq
                        if rec.id != "" and rec.seq != "" and len(
                                rec.seq) > 50:
                            yield rec
                else:
                    if args.keep_all:
                        StripSeq = Seq
                        #now look for reverse
                        BestPosRev, BestDiffsRev = primer.BestMatch2(
                            StripSeq, revPrimer, MAX_PRIMER_MISMATCHES)
                        if BestDiffsRev < MAX_PRIMER_MISMATCHES:
                            StrippedSeq = StripSeq[:BestPosRev]
                        else:
                            StrippedSeq = StripSeq
                        #after stripping primers, check for ambig bases
                        if args.drop_ns != 0 and 'N' * args.drop_ns in StrippedSeq:
                            continue
                        rec.seq = StrippedSeq
                        if rec.id != "" and rec.seq != "" and len(
                                rec.seq) > 50:
                            yield rec
        else:
            #check for ambig bases
            Seq = str(rec.seq)
            if args.drop_ns != 0 and 'N' * args.drop_ns in Seq:
                continue
            if rec.id != "" and rec.seq != "" and len(rec.seq) > 50:
                yield rec
def processRead(input):
    #input is expected to be a FASTQ file
    #local variables that need to be previously declared: ForPrimer, RevPrimer
    Name = os.path.basename(input).split(".fq",-1)[0]
    DemuxOut = os.path.join(args.out, Name + '.demux.fq')
    Sample = Name.split('_')[0]
    StatsOut = os.path.join(args.out, Name+'.stats')
    Total = 0
    NoPrimer = 0
    TooShort = 0
    RevPrimerFound = 0
    ValidSeqs = 0
    PL = len(FwdPrimer)
    with open(StatsOut, 'w') as counts:
        with open(DemuxOut, 'w') as out:
            for title, seq, qual in FastqGeneralIterator(open(input)):
                Total += 1
                #first thing is look for forward primer, if found trim it off
                Diffs = primer.MatchPrefix(seq, FwdPrimer)
                #if require primer is on make finding primer in amplicon required if amplicon is larger than read length
                #if less than read length, can't enforce primer because could have been trimmed via staggered trim in fastq_mergepairs
                if args.primer == 'on' and len(seq) > ReadLen:
                    if Diffs > args.primer_mismatch:
                        NoPrimer += 1
                        continue
                    Seq = seq[PL:]
                    Qual = qual[PL:]
                else:
                    if Diffs <= args.primer_mismatch:
                        Seq = seq[PL:]
                        Qual = qual[PL:]
                    else:
                        NoPrimer += 1
                        Seq = seq
                        Qual = qual
                #now look for reverse primer
                BestPosRev, BestDiffsRev = primer.BestMatch2(Seq, RevPrimer, args.primer_mismatch)
                if BestPosRev > 0:  #reverse primer was found
                    RevPrimerFound += 1
                    #location to trim sequences, trim seqs
                    Seq = Seq[:BestPosRev]
                    Qual = Qual[:BestPosRev]
                else:
                    if args.full_length and len(Seq) > ReadLen: #if full length and no primer found, exit, except when length is less than read length
                        continue
                #if full_length is passed, then only trim primers
                if not args.full_length:
                    #got here if primers were found they were trimmed
                    #now check seq length, pad if too short, trim if too long
                    if len(Seq) < args.min_len: #need this check here or primer dimers will get through
                        TooShort += 1
                        continue
                    if len(Seq) < args.trim_len and args.pad == 'on':
                        pad = args.trim_len - len(Seq)
                        Seq = Seq + pad*'N'
                        Qual = Qual +pad*'J'
                    else: #len(Seq) > args.trim_len:
                        Seq = Seq[:args.trim_len]
                        Qual = Qual[:args.trim_len]
                #got here, reads are primers trimmed and trim/padded, check length
                if len(Seq) < args.min_len:
                    TooShort += 1
                    continue
                ValidSeqs += 1     
                #now fix header
                Title = 'R_'+str(ValidSeqs)+';barcodelabel='+Sample+';'
                #now write to file
                out.write("@%s\n%s\n+\n%s\n" % (Title, Seq, Qual))
            counts.write('%i,%i,%i,%i,%i\n' % (Total, NoPrimer, RevPrimerFound, TooShort, ValidSeqs))
Example #8
0
def ProcessReads(records):
    OutCount = 0
    MAX_PRIMER_MISMATCHES = int(args.primer_mismatch)
    LabelPrefix = args.prefix
    MinLen = int(args.min_len)
    TrimLen = int(args.trim_len)
    PL = len(FwdPrimer)
    revPrimer = revcomp_lib.RevComp(RevPrimer)
    for rec in records:
        OutCount += 1
        rec.id = LabelPrefix + str(OutCount) + ";barcodelabel=" + name + ";"
        rec.name = ""
        rec.description = ""
        #turn sequence into string for matching
        Seq = str(rec.seq)
        Diffs = MatchesPrimer(Seq, FwdPrimer)
        if args.primer == "on":
            if Diffs > MAX_PRIMER_MISMATCHES:
                continue
            # Strip fwd primer from rec
            rec = rec[PL:]
        elif args.primer == "off":
            if Diffs < MAX_PRIMER_MISMATCHES:
                # Strip fwd primer from rec
                rec = rec[PL:]
        #turn seq into str again
        Seq = str(rec.seq)
        #look for reverse primer
        BestPosRev, BestDiffsRev = primer.BestMatch2(Seq, revPrimer,
                                                     MAX_PRIMER_MISMATCHES)
        if BestPosRev > 0:
            # Strip rev primer from rec.seq
            rec = rec[:BestPosRev]
            #check length
            L = len(rec.seq)
            if L < MinLen:
                continue
            if not args.full_length:
                #now check trim length, pad if necessary
                if L < TrimLen:
                    pad = TrimLen - L
                    Seq = str(rec.seq)
                    Seq = Seq + pad * 'N'
                    Qual = rec.letter_annotations["phred_quality"]
                    pad = TrimLen - L
                    add = [40] * pad
                    Qual.extend(add)
                    del rec.letter_annotations["phred_quality"]
                    rec.seq = Seq
                    rec.letter_annotations["phred_quality"] = Qual
                    yield rec
                elif L >= TrimLen:
                    rec = rec[:TrimLen]
                    yield rec
            else:
                yield rec
        else:
            #check length
            L = len(rec.seq)
            if not args.full_length:
                if args.primer == 'off':  #if custom primer used, then need to pad from end not only if rev primer found
                    if L < MinLen:  #but for quality control, need to cull reads that are really short as they are likely garbage
                        continue
                    if L < TrimLen:
                        pad = TrimLen - L
                        Seq = str(rec.seq)
                        Seq = Seq + pad * 'N'
                        Qual = rec.letter_annotations["phred_quality"]
                        pad = TrimLen - L
                        add = [40] * pad
                        Qual.extend(add)
                        del rec.letter_annotations["phred_quality"]
                        rec.seq = Seq
                        rec.letter_annotations["phred_quality"] = Qual
                        yield rec
                    elif L >= TrimLen:
                        rec = rec[:TrimLen]
                        yield rec
                elif args.primer == 'on':
                    #truncate down to trim length
                    if L >= TrimLen:
                        rec = rec[:TrimLen]
                        yield rec
            else:
                if L >= MinLen:
                    yield rec