def write(queue_in, queue_out, filename, cpus): from jcvi.utils.progressbar import ProgressBar, Percentage, Bar, ETA fw = must_open(filename, "w") isize = queue_in.qsize() logging.debug("A total of {0} items to compute.".format(isize)) isize = isize or 1 widgets = [ 'Queue: ', Percentage(), ' ', Bar(marker='>', left='[', right=']'), ' ', ETA() ] p = ProgressBar(maxval=isize, term_width=60, widgets=widgets).start() poisons = 0 while True: res = queue_out.get() qsize = queue_in.qsize() p.update(isize - qsize) if isinstance(res, Poison): poisons += 1 if poisons == cpus: # wait all workers finish break elif res: print(res, file=fw) fw.flush() fw.close()
def weblogo(args): """ %prog weblogo [fastafile|fastqfile] Extract base composition for reads """ import numpy as np from jcvi.utils.progressbar import ProgressBar, Percentage, Bar, ETA p = OptionParser(weblogo.__doc__) p.add_option("-N", default=10, type="int", help="Count the first and last N bases") p.add_option("--nreads", default=1000000, type="int", help="Parse first N reads") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) fastqfile, = args N = opts.N nreads = opts.nreads pat = "ATCG" L = np.zeros((4, N), dtype="int32") R = np.zeros((4, N), dtype="int32") p = dict((a, i) for (i, a) in enumerate(pat)) L4, R3 = Counter(), Counter() widgets = ['Parse reads: ', Percentage(), ' ', Bar(marker='>', left='[', right=']'), ' ', ETA()] pr = ProgressBar(maxval=nreads, term_width=60, widgets=widgets).start() k = 0 fw_L = open("L.fasta", "w") fw_R = open("R.fasta", "w") fastq = fastqfile.endswith(".fastq") it = iter_fastq(fastqfile) if fastq else \ SeqIO.parse(must_open(fastqfile), "fasta") for rec in it: k += 1 if k % 1000 == 0: pr.update(k) if k > nreads: break if rec is None: break s = str(rec.seq) for i, a in enumerate(s[:N]): if a in p: a = p[a] L[a][i] += 1 for j, a in enumerate(s[-N:][::-1]): if a in p: a = p[a] R[a][N - 1 - j] += 1 l4, r3 = s[:4], s[-3:] L4[l4] += 1 R3[r3] += 1 print >> fw_L, ">{0}\n{1}".format(k, s[:N]) print >> fw_R, ">{0}\n{1}".format(k, s[-N:]) fw_L.close() fw_R.close() cmd = "weblogo -F png -s large -f {0}.fasta -o {0}.png" cmd += " --color-scheme classic --composition none -U probability" cmd += " --title {1}" sh(cmd.format('L', "First_10_bases")) sh(cmd.format('R', "Last_10_bases")) np.savetxt("L.{0}.csv".format(pat), L, delimiter=',', fmt="%d") np.savetxt("R.{0}.csv".format(pat), R, delimiter=',', fmt="%d") fw = open("L4.common", "w") for p, c in L4.most_common(N): print >> fw, "\t".join((p, str(c))) fw.close() fw = open("R3.common", "w") for p, c in R3.most_common(N): print >> fw, "\t".join((p, str(c))) fw.close()