uc_out = os.path.join(tmp, args.out + '.EE' + args.maxee + '.mapping.uc')
otu_table = os.path.join(tmp, args.out + '.EE' + args.maxee + '.otu_table.txt')
#setup reads to map
if args.map_filtered:
    reads = filter_fasta
else:
    reads = orig_fasta
ufitslib.log.info("Mapping Reads to OTUs and Building OTU table")
cmd = [
    'vsearch', '--usearch_global', reads, '--strand', 'plus', '--id', '0.97',
    '--db', uchime_out, '--uc', uc_out, '--otutabout', otu_table
]
ufitslib.runSubprocess(cmd, ufitslib.log)

#count reads mapped
total = ufitslib.line_count(uc_out)
ufitslib.log.info('{0:,}'.format(total) + ' reads mapped to OTUs ' +
                  '({0:.0f}%)'.format(total / float(orig_total) * 100))

#Move files around, delete tmp if argument passed.
currentdir = os.getcwd()
final_otu = os.path.join(currentdir, args.out + '.cluster.otus.fa')
shutil.copyfile(uchime_out, final_otu)
final_otu_table = os.path.join(currentdir, args.out + '.otu_table.txt')
shutil.copyfile(otu_table, final_otu_table)
if not args.debug:
    shutil.rmtree(tmp)

#Print location of files to STDOUT
print "-------------------------------------------------------"
print "UNOISE2 Script has Finished Successfully"
if not args.list:
    if not args.file:
        print "Error, you must specifiy a list of barcodes or a file containing barcodes"
        os._exit(1)
if not args.file:
    if not args.list:
        print "Error, you must specifiy a list of barcodes or a file containing barcodes"
        os._exit(1)

if args.list and args.file:
    print "Error, you must specifiy either list of barcodes or a file containing barcodes, not both"
    os._exit(1)

if args.file:   
    count = ufitslib.line_count(args.file)
    #load in list of sample names to keep
    with open(args.file, 'rU') as input:
        lines = [line.rstrip('\n') for line in input]

if args.list:
    count = len(args.list)
    lines = args.list

#make sure it is a set, faster lookup
keep_list = set(lines)

#now run filtering 
keep_count = 0
total_count = 0
filter_sample(args.input, args.out)
if not args.list:
    if not args.file:
        print "Error, you must specifiy a list of barcodes or a file containing barcodes"
        os._exit(1)
if not args.file:
    if not args.list:
        print "Error, you must specifiy a list of barcodes or a file containing barcodes"
        os._exit(1)

if args.list and args.file:
    print "Error, you must specifiy either list of barcodes or a file containing barcodes, not both"
    os._exit(1)

if args.file:
    count = ufitslib.line_count(args.file)
    #load in list of sample names to keep
    with open(args.file, 'rU') as input:
        lines = [line.rstrip('\n') for line in input]

if args.list:
    count = len(args.list)
    lines = args.list

#make sure it is a set, faster lookup
keep_list = set(lines)

#now run filtering
keep_count = 0
total_count = 0
filter_sample(args.input, args.out)
#setup output files
dadademux = args.out+'.dada2.map.uc'
bioSeqs = args.out+'.cluster.otus.fa'
bioTable = args.out+'.cluster.otu_table.txt'
demuxtmp = args.out+'.original.fa'
uctmp = args.out+'.map.uc'
ClusterComp = args.out+'.iSeqs2clusters.txt'

#map reads to DADA2 OTUs
ufitslib.log.info("Mapping reads to DADA2 iSeqs")
cmd = ['vsearch', '--fastq_filter', os.path.abspath(args.fastq),'--fastq_qmax', '55', '--fastq_maxns', '0', '--fastaout', demuxtmp]
ufitslib.runSubprocess(cmd, ufitslib.log)
cmd = ['vsearch', '--usearch_global', demuxtmp, '--db', iSeqs, '--id', '0.97', '--uc', dadademux, '--strand', 'plus', '--otutabout', chimeraFreeTable ]
ufitslib.runSubprocess(cmd, ufitslib.log)
total = ufitslib.line_count(dadademux)
ufitslib.log.info('{0:,}'.format(total) + ' reads mapped to iSeqs '+ '({0:.0f}%)'.format(total/float(orig_total)* 100))

#cluster
ufitslib.log.info("Clustering iSeqs at %s%% to generate biological OTUs" % args.pct_otu)
radius = float(args.pct_otu) / 100.
cmd = ['vsearch', '--cluster_smallmem', iSeqs, '--centroids', bioSeqs, '--id', str(radius), '--strand', 'plus', '--relabel', 'OTU', '--qmask', 'none', '--usersort']
ufitslib.runSubprocess(cmd, ufitslib.log)
total = ufitslib.countfasta(bioSeqs)
ufitslib.log.info('{0:,}'.format(total) + ' OTUs generated')

#determine where iSeqs clustered
iSeqmap = args.out+'.iseq_map.uc'
cmd = ['vsearch', '--usearch_global', iSeqs, '--db', bioSeqs, '--id', str(radius), '--uc', iSeqmap, '--strand', 'plus']
ufitslib.runSubprocess(cmd, ufitslib.log)
iSeqMapped = {}
Exemple #5
0
        ufitslib.log.info('{0:,}'.format(total) + ' OTUs passed')

#now map reads back to OTUs and build OTU table
uc_out = os.path.join(tmp, args.out + '.EE' + args.maxee + '.mapping.uc')
otu_table = os.path.join(tmp, args.out + '.EE' + args.maxee + '.otu_table.txt')
#setup reads to map
if args.map_filtered:
    reads = filter_fasta
else:
    reads = orig_fasta
ufitslib.log.info("Mapping Reads to OTUs and Building OTU table")
cmd = ['vsearch', '--usearch_global', reads, '--strand', 'plus', '--id', '0.97', '--db', uchime_out, '--uc', uc_out, '--otutabout', otu_table]
ufitslib.runSubprocess(cmd, ufitslib.log)

#count reads mapped
total = ufitslib.line_count(uc_out)
ufitslib.log.info('{0:,}'.format(total) + ' reads mapped to OTUs '+ '({0:.0f}%)'.format(total/float(orig_total)* 100))

#Move files around, delete tmp if argument passed.
currentdir = os.getcwd()
final_otu = os.path.join(currentdir, args.out + '.cluster.otus.fa')
shutil.copyfile(uchime_out, final_otu)
final_otu_table = os.path.join(currentdir, args.out + '.otu_table.txt')
shutil.copyfile(otu_table, final_otu_table)
if not args.debug:
    shutil.rmtree(tmp)

#Print location of files to STDOUT
print "-------------------------------------------------------"
print "UNOISE2 Script has Finished Successfully"
print "-------------------------------------------------------"