uc_out = os.path.join(tmp, args.out + '.EE' + args.maxee + '.mapping.uc') otu_table = os.path.join(tmp, args.out + '.EE' + args.maxee + '.otu_table.txt') #setup reads to map if args.map_filtered: reads = filter_fasta else: reads = orig_fasta ufitslib.log.info("Mapping Reads to OTUs and Building OTU table") cmd = [ 'vsearch', '--usearch_global', reads, '--strand', 'plus', '--id', '0.97', '--db', uchime_out, '--uc', uc_out, '--otutabout', otu_table ] ufitslib.runSubprocess(cmd, ufitslib.log) #count reads mapped total = ufitslib.line_count(uc_out) ufitslib.log.info('{0:,}'.format(total) + ' reads mapped to OTUs ' + '({0:.0f}%)'.format(total / float(orig_total) * 100)) #Move files around, delete tmp if argument passed. currentdir = os.getcwd() final_otu = os.path.join(currentdir, args.out + '.cluster.otus.fa') shutil.copyfile(uchime_out, final_otu) final_otu_table = os.path.join(currentdir, args.out + '.otu_table.txt') shutil.copyfile(otu_table, final_otu_table) if not args.debug: shutil.rmtree(tmp) #Print location of files to STDOUT print "-------------------------------------------------------" print "UNOISE2 Script has Finished Successfully"
if not args.list: if not args.file: print "Error, you must specifiy a list of barcodes or a file containing barcodes" os._exit(1) if not args.file: if not args.list: print "Error, you must specifiy a list of barcodes or a file containing barcodes" os._exit(1) if args.list and args.file: print "Error, you must specifiy either list of barcodes or a file containing barcodes, not both" os._exit(1) if args.file: count = ufitslib.line_count(args.file) #load in list of sample names to keep with open(args.file, 'rU') as input: lines = [line.rstrip('\n') for line in input] if args.list: count = len(args.list) lines = args.list #make sure it is a set, faster lookup keep_list = set(lines) #now run filtering keep_count = 0 total_count = 0 filter_sample(args.input, args.out)
#setup output files dadademux = args.out+'.dada2.map.uc' bioSeqs = args.out+'.cluster.otus.fa' bioTable = args.out+'.cluster.otu_table.txt' demuxtmp = args.out+'.original.fa' uctmp = args.out+'.map.uc' ClusterComp = args.out+'.iSeqs2clusters.txt' #map reads to DADA2 OTUs ufitslib.log.info("Mapping reads to DADA2 iSeqs") cmd = ['vsearch', '--fastq_filter', os.path.abspath(args.fastq),'--fastq_qmax', '55', '--fastq_maxns', '0', '--fastaout', demuxtmp] ufitslib.runSubprocess(cmd, ufitslib.log) cmd = ['vsearch', '--usearch_global', demuxtmp, '--db', iSeqs, '--id', '0.97', '--uc', dadademux, '--strand', 'plus', '--otutabout', chimeraFreeTable ] ufitslib.runSubprocess(cmd, ufitslib.log) total = ufitslib.line_count(dadademux) ufitslib.log.info('{0:,}'.format(total) + ' reads mapped to iSeqs '+ '({0:.0f}%)'.format(total/float(orig_total)* 100)) #cluster ufitslib.log.info("Clustering iSeqs at %s%% to generate biological OTUs" % args.pct_otu) radius = float(args.pct_otu) / 100. cmd = ['vsearch', '--cluster_smallmem', iSeqs, '--centroids', bioSeqs, '--id', str(radius), '--strand', 'plus', '--relabel', 'OTU', '--qmask', 'none', '--usersort'] ufitslib.runSubprocess(cmd, ufitslib.log) total = ufitslib.countfasta(bioSeqs) ufitslib.log.info('{0:,}'.format(total) + ' OTUs generated') #determine where iSeqs clustered iSeqmap = args.out+'.iseq_map.uc' cmd = ['vsearch', '--usearch_global', iSeqs, '--db', bioSeqs, '--id', str(radius), '--uc', iSeqmap, '--strand', 'plus'] ufitslib.runSubprocess(cmd, ufitslib.log) iSeqMapped = {}
ufitslib.log.info('{0:,}'.format(total) + ' OTUs passed') #now map reads back to OTUs and build OTU table uc_out = os.path.join(tmp, args.out + '.EE' + args.maxee + '.mapping.uc') otu_table = os.path.join(tmp, args.out + '.EE' + args.maxee + '.otu_table.txt') #setup reads to map if args.map_filtered: reads = filter_fasta else: reads = orig_fasta ufitslib.log.info("Mapping Reads to OTUs and Building OTU table") cmd = ['vsearch', '--usearch_global', reads, '--strand', 'plus', '--id', '0.97', '--db', uchime_out, '--uc', uc_out, '--otutabout', otu_table] ufitslib.runSubprocess(cmd, ufitslib.log) #count reads mapped total = ufitslib.line_count(uc_out) ufitslib.log.info('{0:,}'.format(total) + ' reads mapped to OTUs '+ '({0:.0f}%)'.format(total/float(orig_total)* 100)) #Move files around, delete tmp if argument passed. currentdir = os.getcwd() final_otu = os.path.join(currentdir, args.out + '.cluster.otus.fa') shutil.copyfile(uchime_out, final_otu) final_otu_table = os.path.join(currentdir, args.out + '.otu_table.txt') shutil.copyfile(otu_table, final_otu_table) if not args.debug: shutil.rmtree(tmp) #Print location of files to STDOUT print "-------------------------------------------------------" print "UNOISE2 Script has Finished Successfully" print "-------------------------------------------------------"