예제 #1
0
#initialize script, log system info and usearch version
ufitslib.SystemInfo()
#Do a version check
usearch = args.usearch
ufitslib.versionDependencyChecks(usearch)

#make tmp folder
tmp = args.out + '_tmp'
if not os.path.exists(tmp):
    os.makedirs(tmp)

#Count FASTQ records
ufitslib.log.info("Loading FASTQ Records")
orig_total = ufitslib.countfastq(args.FASTQ)
size = checkfastqsize(args.FASTQ)
readablesize = ufitslib.convertSize(size)
ufitslib.log.info('{0:,}'.format(orig_total) + ' reads (' + readablesize + ')')

#Expected Errors filtering step and convert to fasta
filter_out = os.path.join(tmp, args.out + '.EE' + args.maxee + '.filter.fq')
filter_fasta = os.path.join(tmp, args.out + '.EE' + args.maxee + '.filter.fa')
orig_fasta = os.path.join(tmp, args.out + '.orig.fa')
ufitslib.log.info("Quality Filtering, expected errors < %s" % args.maxee)
cmd = [
    'vsearch', '--fastq_filter', args.FASTQ, '--fastq_maxee',
    str(args.maxee), '--fastqout', filter_out, '--fastaout', filter_fasta,
    '--fastq_qmax', '55'
]
ufitslib.runSubprocess(cmd, ufitslib.log)
cmd = [
    'vsearch', '--fastq_filter', args.FASTQ, '--fastaout', orig_fasta,
        ID = line.split("=")[-1].split(";")[0]
        if ID not in BarcodeCount:
            BarcodeCount[ID] = 1
        else:
            BarcodeCount[ID] += 1

#now let's count the barcodes found and count the number of times they are found.
barcode_counts = "%30s:  %s" % ('Sample', 'Count')
for k,v in natsorted(BarcodeCount.items(), key=lambda (k,v): v, reverse=True):
    barcode_counts += "\n%30s:  %s" % (k, str(BarcodeCount[k]))
ufitslib.log.info("Found %i barcoded samples\n%s" % (len(BarcodeCount), barcode_counts))

if not args.mapping_file:
    #create a generic mappingfile for downstream processes
    genericmapfile = args.out + '.mapping_file.txt'
    ufitslib.CreateGenericMappingFileIllumina(sampleDict, FwdPrimer, revcomp_lib.RevComp(RevPrimer), genericmapfile)


#get file size
filesize = os.path.getsize(catDemux)
readablesize = ufitslib.convertSize(filesize)
ufitslib.log.info("Output file:  %s (%s)" % (catDemux, readablesize))
ufitslib.log.info("Mapping file: %s" % genericmapfile)
if args.cleanup:
    shutil.rmtree(args.out)
print "-------------------------------------------------------"
if 'win32' in sys.platform:
    print "\nExample of next cmd: ufits cluster -i %s -o out\n" % (catDemux)
else:
    print col.WARN + "\nExample of next cmd: " + col.END + "ufits cluster -i %s -o out\n" % (catDemux)