Ejemplo n.º 1
0
tmpdir = args.out.split('.')[0] + '_' + str(os.getpid())
if not os.path.exists(tmpdir):
    os.makedirs(tmpdir)

#split fastq file
amptklib.split_fastq(SeqIn, orig_total, tmpdir, cpus * 2)

#now get file list from tmp folder
file_list = []
for file in os.listdir(tmpdir):
    if file.endswith(".fq"):
        file = os.path.join(tmpdir, file)
        file_list.append(file)

#finally process reads over number of cpus
amptklib.runMultiProgress(processRead, file_list, cpus)

print "-------------------------------------------------------"
#Now concatenate all of the demuxed files together
amptklib.log.info("Concatenating Demuxed Files")

tmpDemux = args.out + '.tmp.demux.fq'
with open(tmpDemux, 'wb') as outfile:
    for filename in glob.glob(os.path.join(tmpdir, '*.demux.fq')):
        if filename == tmpDemux:
            continue
        with open(filename, 'rU') as readfile:
            shutil.copyfileobj(readfile, outfile)
#parse the stats
finalstats = [0, 0, 0, 0, 0, 0, 0]
for file in os.listdir(tmpdir):
Ejemplo n.º 2
0
    stripPrimer(args.fasta)
else:
    amptklib.log.info("Using %i cpus to process data" % cpus)

    #now split it into chunks (as many cpus as are queried)
    amptklib.split_fasta(args.fasta, folder, cpus * 2)

    #get list of files
    file_list = []
    for file in os.listdir(folder):
        if file.endswith(".fasta"):
            file = os.path.join(folder, file)
            file_list.append(file)

    #finally process reads over number of cpus
    amptklib.runMultiProgress(stripPrimer, file_list, cpus)

#now concatenate outputs together
OutName = args.out + '.extracted.fa'
ErrorName = args.out + '.errors.fa'
with open(OutName, 'w') as outfile:
    with open(ErrorName, 'w') as outfile2:
        for filename in os.listdir(os.path.join(folder)):
            if filename.endswith('.extracted.fa'):
                if filename == OutName:
                    continue
                with open(os.path.join(folder, filename), 'rU') as readfile:
                    shutil.copyfileobj(readfile, outfile)
            if filename.endswith('.errors.fa'):
                if filename == ErrorName:
                    continue
Ejemplo n.º 3
0
        sys.exit(1)
    #reverse comp primers for search
    RevPrimer = revcomp_lib.RevComp(args.rev_primer)
    RevForPrimer = revcomp_lib.RevComp(args.fwd_primer)
    #setup tmpdir
    tmpdir = args.out.split('.')[0] + '_' + str(os.getpid())
    if not os.path.exists(tmpdir):
        os.makedirs(tmpdir)

    #first run forwards
    print("Splitting forward reads into buckets")
    GoodFor = args.out + '.fwd.stripped.fq'
    BadFor = args.out + '.fwd.no_primer.fq'
    filelist = splitter(args.input, tmpdir)
    print("Stripping primers from forward reads")
    amptklib.runMultiProgress(primerStrip, filelist, cpus)
    combiner(tmpdir, ".good", GoodFor)
    combiner(tmpdir, ".bad", BadFor)
    shutil.rmtree(tmpdir)

    #now run reverse
    print("Splitting reverse reads into buckets")
    tmpdir = args.out.split('.')[0] + '_' + str(os.getpid() + 1)
    os.makedirs(tmpdir)
    GoodRev = args.out + '.rev.stripped.fq'
    BadRev = args.out + '.rev.no_primer.fq'
    revfilelist = splitter(args.reverse, tmpdir)
    print("Stripping primers from reverse reads")
    amptklib.runMultiProgress(revprimerStrip, revfilelist, cpus)
    combiner(tmpdir, ".good", GoodRev)
    combiner(tmpdir, ".bad", BadRev)