def assess_sample_primers(sample, fqpair, outfiles, primerlist, primerinfo, args): outlabel = fastq_file_label(sample, args.outdir) loglabel = fastq_file_label(sample, args.logdir) if args.logdir \ else outlabel frag_primer_file = outlabel + ".read2primers.txt" primer_read_file = outlabel + ".primer2reads.txt" logfile = loglabel + ".primer_trim.log" logfh = open(logfile, 'w') try: sys.stderr.write(" Creating primer reports for {}\n".format(sample)) logfh.write("Sample {}\n".format(sample)) if have_files([frag_primer_file, primer_read_file], args.force, stderr=logfh): logfh.write(" Already have {} and {}\n".format( frag_primer_file, primer_read_file)) frag2primers = arrange_by_fragments(fqpair, outfiles, primerinfo, logfh) fragcounts = create_fragment_report(fqpair, frag2primers, frag_primer_file, True, logfh) primerreads = bin_primer_reads(frag2primers, primerlist, logfh) (primercounts, primerkeys) = create_primer_report(primerreads, primerlist, primer_read_file, logfh, True, args.debug) except Exception, e: e.args += (sample, ) raise
def assess_sample_primers(sample, fqpair, outfiles, primerlist, primerinfo, args): outlabel = fastq_file_label(sample, args.outdir) loglabel = fastq_file_label(sample, args.logdir) if args.logdir \ else outlabel frag_primer_file = outlabel + ".read2primers.txt" primer_read_file = outlabel + ".primer2reads.txt" logfile = loglabel + ".primer_trim.log" logfh = open(logfile, 'w') try: sys.stderr.write(" Creating primer reports for {}\n".format(sample)) logfh.write("Sample {}\n".format(sample)) if have_files([frag_primer_file, primer_read_file], args.force, stderr=logfh): logfh.write(" Already have {} and {}\n".format(frag_primer_file, primer_read_file)) frag2primers = arrange_by_fragments(fqpair, outfiles, primerinfo, logfh) fragcounts = create_fragment_report(fqpair, frag2primers, frag_primer_file, True, logfh) primerreads = bin_primer_reads(frag2primers, primerlist, logfh) (primercounts, primerkeys) = create_primer_report(primerreads, primerlist, primer_read_file, logfh, True, args.debug) except Exception, e: e.args += (sample, ) raise
def trim_primers(fqfile, alignout, max_trim_len, primerinfo, outlabel, logfh, args): """Returns trimmed fastq file and file with list of sequence names""" trimmedfq = outlabel + ".trimmed.fastq" seqfile = outlabel + ".seqlist.txt" logfh.write(" Trimming fq: {}\n".format(trimmedfq)) if have_files([trimmedfq, seqfile], args.force, stderr=logfh): logfh.write(" Already have {}\n".format(trimmedfq)) return (trimmedfq, seqfile) aligns = parse_alignout(alignout) seqlist = [] with open(trimmedfq, 'w') as outfq: inseq = FastQParser(fqfile) for seqrec in inseq: seqlist.append(seqrec.id) if seqrec.id in aligns: primer = aligns[seqrec.id]['primer'] if primerinfo[primer]['overlap']: primerend = aligns[seqrec.id]['end'] +\ aligns[seqrec.id]['left'] subrec = seqrec[primerend:] if args.debug: logfh.write("{}\tTrimming\t{}\n".format( primer, seqrec.id)) else: if args.debug: logfh.write("{}\tNot trimming\t{}\n".format( primer, seqrec.id)) subrec = seqrec else: #trim default max_primer_len+2 subrec = seqrec[max_trim_len:] outfq.write("{}\n".format(subrec.fastq())) logfh.write(" Seq list: {}\n".format(seqfile)) if have_file(seqfile, True, stderr=logfh): logfh.write(" Still have {}\n".format(seqfile)) sys.exit() with open_file(seqfile, 'w') as ifh: ifh.write("\n".join(seqlist) + "\n") return (trimmedfq, seqfile)
def trim_primers(fqfile, alignout, max_trim_len, primerinfo, outlabel, logfh, args): """Returns trimmed fastq file and file with list of sequence names""" trimmedfq = outlabel + ".trimmed.fastq" seqfile = outlabel + ".seqlist.txt" logfh.write(" Trimming fq: {}\n".format(trimmedfq)) if have_files([trimmedfq, seqfile], args.force, stderr=logfh): logfh.write(" Already have {}\n".format(trimmedfq)) return (trimmedfq, seqfile) aligns = parse_alignout(alignout) seqlist = [] with open(trimmedfq, 'w') as outfq: inseq = FastQParser(fqfile) for seqrec in inseq: seqlist.append(seqrec.id) if seqrec.id in aligns: primer = aligns[seqrec.id]['primer'] if primerinfo[primer]['overlap']: primerend = aligns[seqrec.id]['end'] +\ aligns[seqrec.id]['left'] subrec = seqrec[primerend:] if args.debug: logfh.write("{}\tTrimming\t{}\n".format( primer, seqrec.id)) else: if args.debug: logfh.write("{}\tNot trimming\t{}\n".format( primer, seqrec.id)) subrec = seqrec else: #trim default max_primer_len+2 subrec = seqrec[max_trim_len:] outfq.write("{}\n".format(subrec.fastq())) logfh.write(" Seq list: {}\n".format(seqfile)) if have_file(seqfile, True, stderr=logfh): logfh.write(" Still have {}\n".format(seqfile)) sys.exit() with open_file(seqfile, 'w') as ifh: ifh.write("\n".join(seqlist)+"\n") return (trimmedfq, seqfile)