def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-a", "--gtf-a", dest="gtf_a", type="string", help="supply a gtf file - will compress uncompressed files" ) parser.add_option("-b", "--gtf-b", dest = "gtf_b", type = "string", help="supply a second gtf file - will compress uncompressed files") parser.add_option("-s", "--scripts-dir", dest = "scripts_dir", type = "string", help="supply a location for accessory scripts") parser.add_option( "--no-venn", dest = "no_venn", action="store_true", help="set if no venn is to be drawn") ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start( parser, argv = argv ) gtf_files = [options.gtf_a, options.gtf_b] merged_files = [] prefices = [] E.info("merging gtf files") for gtf in gtf_files: if gtf.endswith(".gtf.gz"): outfile = P.snip(gtf, ".gtf.gz") + ".merged.gtf.gz" prefices.append(P.snip(gtf, ".gtf.gz")) merged_files.append(outfile) statement = '''zcat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile) P.run() elif gtf.endswith(".gtf"): outfile = P.snip(gtf, ".gtf") + ".merged.gtf.gz" prefices.append(P.snip(gtf,".gtf")) merged_files.append(outfile) statement = '''cat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile) P.run() else: raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf) for prefix in prefices: if options.gtf_a.find(prefix) != -1: gtf_a = prefix + ".merged.gtf.gz" prefix_a = prefix elif options.gtf_b.find(prefix) != -1: gtf_b = prefix + ".merged.gtf.gz" prefix_b = prefix E.info("intersecting gtf files") # intersect the resulting merged files scriptsdir = options.scripts_dir intersection_out = "_vs_".join([prefix_a, prefix_b]) + ".intersection.gtf.gz" statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa | python %(scriptsdir)s/gtf2gtf.py --merge-transcripts --log=log | gzip > %(intersection_out)s''' P.run() if not options.no_venn: E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b)) # produce the venn diagram intersection_file = intersection_out gtf_a_merged = gtf_a gtf_b_merged = gtf_b # create dictionary key gtf_pair = (gtf_a_merged, gtf_b_merged) # containers for counts count_gtf_merged_a = 0 count_gtf_merged_b = 0 count_intersection = 0 # create GTF iterator objects gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0])) gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1])) gtf_iterator_intersection = GTF.iterator(IOTools.openFile(intersection_file)) # do the counts for each file E.info("counting entries in %s" % gtf_a) for entry in gtf_iterator_a: count_gtf_merged_a += 1 print "counts for gtf-a: ",count_gtf_merged_a E.info("counting entries in %s" % gtf_b) for entry in gtf_iterator_b: count_gtf_merged_b += 1 print "counts for gtf-b: ",count_gtf_merged_b E.info("counting entries in %s" % intersection_file) for entry in gtf_iterator_intersection: count_intersection += 1 print "counts for intersection: ", count_intersection # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this # set to make up the remaining - non-overlapping set result = {} E.info("assembling count lists") result[gtf_pair] = {"gtf-b" : map(str,xrange(count_gtf_merged_b)) , "gtf-a" : map(str,xrange(count_intersection)) + map(str, [random.random() for i in range(count_intersection,count_gtf_merged_a)] )} R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R") R.source(R_source) prefix_a = prefix_a.replace(".", "_").replace("-", "_") prefix_b = prefix_b.replace(".", "_").replace("-", "_") R('''prefix.a <- "%s"''' % prefix_a) R('''prefix.b <- "%s"''' % prefix_b) E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png")) R["venn.diagram2"](R.list( A = result[gtf_pair]["gtf-a"], B = result[gtf_pair]["gtf-b"]) , prefix_a + "_vs_" + prefix_b + ".overlap.png" , **{'cat.cex': 1.5 , 'main.fontfamily': "Arial" , 'cat.pos':FloatVector((0,0)) , 'cat.fontfamily':"Arial" , 'main.cex':1.8 , 'height':1000 , 'width':1000 , 'cex':2 , 'fontfamily':"Arial" , 'lwd':R.c(1,1) , 'fill':R.c(R.rgb(0,0,0.5,0.5), R.rgb(0.5,0,0,0.5)) , 'category.names':R.c(prefix_a, prefix_b) , 'margin' : R.c(0.1,0.1,0.1,0.1) }) ## write footer and output benchmark information. E.Stop()
def draw_r(regions, points, **kwargs): # initialize the environment from rpy2.interactive import process_revents from rpy2.robjects import r from rpy2.robjects.packages import importr NA = r("NA")[0] RGB = lambda rgb: r.rgb(*rgb, maxColorValue=256) C = lambda seq: r.c(*seq) OOB = 40 graphics = importr("graphics") grDevices = importr("grDevices") process_revents.start() graphics.par(bg="white") graphics.split_screen(r.c(2, 1)) graphics.split_screen(r.c(1, 2), screen=2) graphics.screen(1) # prepare the regions for plotting ul, lr = regions.box() xlim = r.c(ul[0], lr[0]) ylim = r.c(lr[1], ul[1]) # create the main plot window graphics.plot(r.c(), r.c(), main=regions.name(), type="p", pch="+", xlim=xlim, ylim=ylim, xlab="", ylab="", xaxp=r.c(0, lr[0], lr[0]/200), yaxp=r.c(0, lr[1], lr[1]/200), bg="white") # plot the polygons in the order given order = sorted(regions.polys(), key=lambda p: p.area, reverse=True) for poly in order: xs, ys = zip(*poly.boundary[0].coords) color = regions.color(poly.name(), default=NA) cr, cg, cb = r.col2rgb(color) rgb = r.rgb(cr, cg, cb, alpha=128, maxColorValue=255) graphics.polygon(C(xs), C(ys), col=rgb) # plot the grid graphics.abline(v=r.c(OOB, lr[0]-OOB), lty=2) graphics.abline(h=r.seq(0, lr[1], 200), col="lightgray", lty=2) graphics.abline(v=r.seq(0, lr[0], 200), col="lightgray", lty=2) # plot the points xs, ys, names = zip(*[(pt[0].x, pt[0].y, pt[1]) for pt in points]) colors = [RGB(points.Color(name)) for name in names] graphics.points(C(xs), C(ys), xlab="", ylab="", pch="+", col=C(colors)) # save as a png if "png" in kwargs and kwargs['png']: grDevices.dev_print(grDevices.png, file=kwargs['png'], width=lr[0], height=lr[1]) # derive legend contents: colors, counts, names tid_counts = {} uniq_tids = [] for n in names: if n not in uniq_tids: tid_counts[n] = 0 uniq_tids.append(n) tid_counts[n] += 1 uniq_colors = [RGB(points.Color(tid)) for tid in uniq_tids] uniq_names = [("%d\t%s" % (i, IDs.TileID[i])) for i in uniq_tids] name_counts = [("%d\t%s: %d" % (k, IDs.TileID[k], v)) for (k,v) in \ tid_counts.items()] # display the colors legend legend_args = dict(y_intersp=0.7, cex=0.7) graphics.screen(3) graphics.legend("center", title="Tile Colors", legend=C(uniq_names), col=C(uniq_colors), pch="+", pt_cex=1, **legend_args) # display the counts legend graphics.screen(4) graphics.legend("center", title="Tile Counts", legend=C(name_counts), **legend_args) # sleep until the window is closed while grDevices.dev_list() != r("NULL"): time.sleep(0.1)
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option( "-a", "--first-gtf-file", dest="gtf_a", type="string", help="supply a gtf file - will compress uncompressed files") parser.add_option( "-b", "--second-gtf-file", dest="gtf_b", type="string", help="supply a second gtf file - will compress uncompressed files") parser.add_option("-s", "--scripts-dir", dest="scripts_dir", type="string", help="supply a location for accessory scripts") parser.add_option("--no-venn", dest="no_venn", action="store_true", help="set if no venn is to be drawn") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) gtf_files = [options.gtf_a, options.gtf_b] merged_files = [] prefices = [] E.info("merging gtf files") for gtf in gtf_files: if gtf.endswith(".gtf.gz"): outfile = IOTools.snip(gtf, ".gtf.gz") + ".merged.gtf.gz" prefices.append(IOTools.snip(gtf, ".gtf.gz")) merged_files.append(outfile) statement = '''zcat %s | python %s/gtf2gtf.py --method=merge-transcripts --log=%s.log | gzip > %s''' % ( gtf, options.scripts_dir, outfile, outfile) P.execute(statement) elif gtf.endswith(".gtf"): outfile = IOTools.snip(gtf, ".gtf") + ".merged.gtf.gz" prefices.append(IOTools.snip(gtf, ".gtf")) merged_files.append(outfile) statement = '''cat %s | python %s/gtf2gtf.py --method=merge-transcripts --log=%s.log | gzip > %s''' % ( gtf, options.scripts_dir, outfile, outfile) E.execute(statement) else: raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf) for prefix in prefices: if options.gtf_a.find(prefix) != -1: gtf_a = prefix + ".merged.gtf.gz" prefix_a = prefix elif options.gtf_b.find(prefix) != -1: gtf_b = prefix + ".merged.gtf.gz" prefix_b = prefix E.info("intersecting gtf files") # intersect the resulting merged files scriptsdir = options.scripts_dir intersection_out = "_vs_".join([prefix_a, prefix_b ]) + ".intersection.gtf.gz" statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa | python %(scriptsdir)s/gtf2gtf.py --method=merge-transcripts --log=log | gzip > %(intersection_out)s''' P.run() if not options.no_venn: E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b)) # produce the venn diagram intersection_file = intersection_out gtf_a_merged = gtf_a gtf_b_merged = gtf_b # create dictionary key gtf_pair = (gtf_a_merged, gtf_b_merged) # containers for counts count_gtf_merged_a = 0 count_gtf_merged_b = 0 count_intersection = 0 # create GTF iterator objects gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0])) gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1])) gtf_iterator_intersection = GTF.iterator( IOTools.openFile(intersection_file)) # do the counts for each file E.info("counting entries in %s" % gtf_a) for entry in gtf_iterator_a: count_gtf_merged_a += 1 print("counts for gtf-a: ", count_gtf_merged_a) E.info("counting entries in %s" % gtf_b) for entry in gtf_iterator_b: count_gtf_merged_b += 1 print("counts for gtf-b: ", count_gtf_merged_b) E.info("counting entries in %s" % intersection_file) for entry in gtf_iterator_intersection: count_intersection += 1 print("counts for intersection: ", count_intersection) # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this # set to make up the remaining - non-overlapping set result = {} E.info("assembling count lists") result[gtf_pair] = { "gtf-b": list(map(str, range(count_gtf_merged_b))), "gtf-a": list(map(str, range(count_intersection))) + list( map(str, [ random.random() for i in range(count_intersection, count_gtf_merged_a) ])) } R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R") R.source(R_source) prefix_a = prefix_a.replace(".", "_").replace("-", "_") prefix_b = prefix_b.replace(".", "_").replace("-", "_") R('''prefix.a <- "%s"''' % prefix_a) R('''prefix.b <- "%s"''' % prefix_b) E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png")) R["venn.diagram2"](R.list(A=result[gtf_pair]["gtf-a"], B=result[gtf_pair]["gtf-b"]), prefix_a + "_vs_" + prefix_b + ".overlap.png", **{ 'cat.cex': 1.5, 'main.fontfamily': "Arial", 'cat.pos': FloatVector((0, 0)), 'cat.fontfamily': "Arial", 'main.cex': 1.8, 'height': 1000, 'width': 1000, 'cex': 2, 'fontfamily': "Arial", 'lwd': R.c(1, 1), 'fill': R.c(R.rgb(0, 0, 0.5, 0.5), R.rgb(0.5, 0, 0, 0.5)), 'category.names': R.c(prefix_a, prefix_b), 'margin': R.c(0.1, 0.1, 0.1, 0.1) }) # write footer and output benchmark information. E.Stop()