tmpdir = args.out.split('.')[0] + '_' + str(os.getpid()) if not os.path.exists(tmpdir): os.makedirs(tmpdir) #split fastq file amptklib.split_fastq(SeqIn, orig_total, tmpdir, cpus * 2) #now get file list from tmp folder file_list = [] for file in os.listdir(tmpdir): if file.endswith(".fq"): file = os.path.join(tmpdir, file) file_list.append(file) #finally process reads over number of cpus amptklib.runMultiProgress(processRead, file_list, cpus) print "-------------------------------------------------------" #Now concatenate all of the demuxed files together amptklib.log.info("Concatenating Demuxed Files") tmpDemux = args.out + '.tmp.demux.fq' with open(tmpDemux, 'wb') as outfile: for filename in glob.glob(os.path.join(tmpdir, '*.demux.fq')): if filename == tmpDemux: continue with open(filename, 'rU') as readfile: shutil.copyfileobj(readfile, outfile) #parse the stats finalstats = [0, 0, 0, 0, 0, 0, 0] for file in os.listdir(tmpdir):
stripPrimer(args.fasta) else: amptklib.log.info("Using %i cpus to process data" % cpus) #now split it into chunks (as many cpus as are queried) amptklib.split_fasta(args.fasta, folder, cpus * 2) #get list of files file_list = [] for file in os.listdir(folder): if file.endswith(".fasta"): file = os.path.join(folder, file) file_list.append(file) #finally process reads over number of cpus amptklib.runMultiProgress(stripPrimer, file_list, cpus) #now concatenate outputs together OutName = args.out + '.extracted.fa' ErrorName = args.out + '.errors.fa' with open(OutName, 'w') as outfile: with open(ErrorName, 'w') as outfile2: for filename in os.listdir(os.path.join(folder)): if filename.endswith('.extracted.fa'): if filename == OutName: continue with open(os.path.join(folder, filename), 'rU') as readfile: shutil.copyfileobj(readfile, outfile) if filename.endswith('.errors.fa'): if filename == ErrorName: continue
sys.exit(1) #reverse comp primers for search RevPrimer = revcomp_lib.RevComp(args.rev_primer) RevForPrimer = revcomp_lib.RevComp(args.fwd_primer) #setup tmpdir tmpdir = args.out.split('.')[0] + '_' + str(os.getpid()) if not os.path.exists(tmpdir): os.makedirs(tmpdir) #first run forwards print("Splitting forward reads into buckets") GoodFor = args.out + '.fwd.stripped.fq' BadFor = args.out + '.fwd.no_primer.fq' filelist = splitter(args.input, tmpdir) print("Stripping primers from forward reads") amptklib.runMultiProgress(primerStrip, filelist, cpus) combiner(tmpdir, ".good", GoodFor) combiner(tmpdir, ".bad", BadFor) shutil.rmtree(tmpdir) #now run reverse print("Splitting reverse reads into buckets") tmpdir = args.out.split('.')[0] + '_' + str(os.getpid() + 1) os.makedirs(tmpdir) GoodRev = args.out + '.rev.stripped.fq' BadRev = args.out + '.rev.no_primer.fq' revfilelist = splitter(args.reverse, tmpdir) print("Stripping primers from reverse reads") amptklib.runMultiProgress(revprimerStrip, revfilelist, cpus) combiner(tmpdir, ".good", GoodRev) combiner(tmpdir, ".bad", BadRev)