def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-m", "--method", dest="method", type="choice", help="method to use [kl=kullback-leibler]", choices=("kl", )) parser.add_option("-n", "--no-normalize", dest="normalize", action="store_false", help="do not normalize data") parser.add_option("-p", "--pseudocounts", dest="pseudocounts", type="int", help="pseudocounts to add.") parser.add_option("-f", "--number-format", dest="number_format", type="string", help="number format.") parser.set_defaults(method="kl", columns="all", headers=True, xrange=None, pseudocounts=1, normalize=True, number_format="%6.4f") (options, args) = E.start(parser, add_pipe_options=True) if options.xrange: options.xrange = list(map(float, options.xrange.split(","))) data, legend = IOTools.readTable(sys.stdin, numeric_type=numpy.float32, take=options.columns, headers=options.headers, truncate=options.xrange) nrows, ncols = data.shape # first: normalize rows for y in range(1, ncols): for x in range(nrows): data[x, y] = data[x, y] + float(options.pseudocounts) if options.normalize: t = numpy.sum(data[:, y]) for x in range(nrows): data[x, y] = data[x, y] / t for x in range(1, len(legend) - 1): for y in range(x + 1, len(legend)): if options.method == "kl": d1 = 0.0 d2 = 0.0 for bin in range(nrows): p = data[bin, x] q = data[bin, y] d1 += p * math.log(p / q) d2 += q * math.log(q / p) options.stdout.write( "%s\t%s\t%s\n" % (legend[x], legend[y], options.number_format % d1)) options.stdout.write( "%s\t%s\t%s\n" % (legend[y], legend[x], options.number_format % d2)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: data2multiple_anova.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take for calculating histograms.") parser.add_option("-t", "--tree-nh-file", dest="filename_tree", type="string", help="filename with tree(s).") parser.add_option("--skip-header", dest="add_header", action="store_false", help="do not add header to flat format.") parser.add_option("--output-with-header", dest="write_header", action="store_true", help="write header and exit.") parser.add_option("--debug", dest="debug", action="store_true", help="debug mode") parser.add_option("--display-tree", dest="display_tree", action="store_true", help="display the tree") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=("contrasts", "spearman", "pearson", "compute"), help="methods to perform on contrasts.") parser.set_defaults( columns="all", filename_tree=None, add_header=True, write_header=False, debug=False, methods=[], value_format="%6.4f", pvalue_format="%e", display_tree=False, ) (options, args) = E.start(parser, quiet=True) if options.columns not in ("all", "all-but-first"): options.columns = [int(x) - 1 for x in options.columns.split(",")] data = [] options.filenames = args for filename in options.filenames: infile = IOTools.open_file(filename, "r") table, headers = IOTools.readTable( infile, take=options.columns, headers=False) infile.close() data.append(table) fields = ["Df", "Sum Sq", "F value", "Pr(>F)", "Mean Sq"] options.stdout.write("set1\tset2") for field in fields: options.stdout.write("\t%s" % field) options.stdout.write("\n") # CODE needs to be refactored for rpy2 usage for x in range(len(data)): for y in range(x + 1, len(data)): rpy.set_default_mode(rpy.NO_CONVERSION) factors = ["x"] * len(data[x][:, 0]) + ["y"] * len(data[y][:, 0]) values = list(data[x][:, 0]) + list(data[y][:, 0]) linear_model = R.lm( R("y ~ x"), data=R.data_frame(x=factors, y=values)) rpy.set_default_mode(rpy.BASIC_CONVERSION) result = R.anova(linear_model) options.stdout.write( "%s\t%s" % (options.filenames[x], options.filenames[y])) for field in fields: options.stdout.write("\t%s" % str(result[field])) options.stdout.write("\n")
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: histogram2histogram.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-i", "--is-int", dest="is_ints", action="store_true", help="categories are integers.") parser.add_option("-m", "--method", dest="method", type="choice", choices=("append", "cumul", "rcumul", "normalize"), help="method(s) to apply.") parser.add_option("--no-headers", dest="headers", action="store_false", help="histogram has no headers.") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to use for plotting.") parser.add_option("", "--truncate", dest="truncate", type="string", help="truncate at range.") parser.add_option("", "--no-out-of-range", dest="cumulate_out_of_range", action="store_false", help="add up bins out of range.") parser.add_option("--bin-format", dest="format_bin", type="string", help="format for bins.") parser.add_option("--value-format", dest="format_val", type="string", help="format for vals.") parser.set_defaults( is_ints=False, method="append", columns="all", headers=True, truncate=None, cumulate_out_of_range=True, format_bin="%6.4f", format_val="%6.4f", ) (options, args) = E.start(parser) # old histogram2histogram.py semantics - need to merged with newer # code below. if options.method == "append": vals = [] # retrieve histogram lines = [x for x in sys.stdin.readlines() if x[0] != "#"] # check if first line contains a header d = lines[0][:-1].split("\t")[0] try: if options.is_ints: value = int(d) else: value = float(d) except ValueError: print("\t".join( (d, "counts", "frequency", "cumulative counts", "increasing cumulative frequency", "cumulative counts", "decreasing cumulative frequency"))) del lines[0] data = [list(map(float, x[:-1].split("\t"))) for x in lines] if len(data) == 0: raise ValueError("no data found") total = float(reduce(lambda x, y: x + y, [x[1] for x in data])) cumul_down = int(total) cumul_up = 0 if options.is_ints: form = "%i\t%i\t%6.4f\t%i\t%6.4f\t%i\t%6.4f" else: form = "%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f" for bin, val in data: percent = float(val) / total cumul_up += val percent_cumul_up = float(cumul_up) / total percent_cumul_down = float(cumul_down) / total print(form % (bin, val, percent, cumul_up, percent_cumul_up, cumul_down, percent_cumul_down)) cumul_down -= val else: if options.truncate: options.truncate = list(map(float, options.truncate.split(","))) options.method = options.method.split(",") data, legend = IOTools.readTable(sys.stdin, numeric_type=numpy.float32, take=options.columns, headers=options.headers, truncate=options.truncate, cumulate_out_of_range=options.cumulate_out_of_range) nfields = len(legend) # note: because of MA, iteration makes copy of slices # Solution: inplace edits. nrows, ncols = data.shape for method in options.method: if method == "cumul": l = [0] * ncols for x in range(nrows): for y in range(1, ncols): data[x, y] += l[y] l[y] = data[x, y] elif method == "rcumul": l = [0] * ncols for x in range(nrows - 1, 0, -1): for y in range(1, ncols): data[x, y] += l[y] l[y] = data[x, y] elif method == "normalize": m = [0] * ncols for x in range(nrows): for y in range(1, ncols): # the conversion to float is necessary m[y] = max(m[y], float(data[x, y])) for y in range(1, ncols): if m[y] == 0: m[y] = 1.0 for x in range(nrows): for y in range(1, ncols): data[x, y] = data[x, y] / m[y] else: raise "unknown method %s" % method print("\t".join(legend)) format = options.format_bin + "\t" + \ "\t".join([options.format_val] * (nfields - 1)) for d in data: print(format % tuple(d)) E.stop()
def main(argv=None): parser = E.OptionParser( version= "%prog version: $Id: plot_histogram.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-l", "--plot-legend", dest="legend", type="string", help="legend for plot [default=%default].") parser.add_option("-t", "--title", dest="title", type="string", help="title for plot [default=%default].") parser.add_option( "-p", "--hardcopy", dest="hardcopy", type="string", help= "filename for hardcopy of plot. The extension defines the format. Known extensions are: 'emf, eps, jpeg, jpg, pdf, png, ps, raw, rgba, svg, svgz' [default=%default].", metavar="FILE") parser.add_option("", "--xrange", dest="xrange", type="string", help="x viewing range of plot [default=%default].") parser.add_option("", "--yrange", dest="yrange", type="string", help="y viewing range of plot[default=%default].") parser.add_option("-o", "--logscale", dest="logscale", type="string", help="use logscale on x, y or xy [default=%default]") parser.add_option("-x", "--xtitle", dest="xtitle", type="string", help="title for x axis [default=%default]") parser.add_option("-y", "--ytitle", dest="ytitle", type="string", help="title for y axis [default=%default]") parser.add_option("-d", "--dpi", dest="dpi", type="int", help="dpi of images [default=%default]") parser.add_option("-n", "--normalize", dest="normalize", action="store_true", help="normalize histograms [default=%default]") parser.add_option( "--cumulate", dest="cumulate", action="store_true", help="calculate cumulative histogram [default=%default].") parser.add_option( "--reverse-cumulate", dest="reverse_cumulate", action="store_true", help= "calculate cumulative histogram in reverse order [default=%default].") parser.add_option("--legend-location", dest="legend_location", type="choice", choices=("upper left", "upper right", "lower left", "lower right", "center", "center right", "center left", "none"), help="location of legend [default=%default]") parser.add_option("--backend", dest="backend", type="string", help="backend to use [Agg|SVG|PS] [default=%default]") parser.add_option( "--symbols", dest="symbols", type="string", help="symbols to use for each histogram [steps|...] [default=%default]." ) parser.add_option("--dump", dest="dump", action="store_true", help="dump data for debug purposes [default=%default].") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to use for plotting [default=%default].") parser.add_option( "--truncate", dest="truncate", action="store_true", help= "truncate date within x range. If not set, xrange is simply a viewing range [default=%default]." ) parser.add_option("--as-lines", dest="as_lines", action="store_true", help="plot only lines, no symbols [default=%default].") parser.add_option( "--noheaders", dest="headers", action="store_false", help="do not take first input line as header [default=%default].") parser.add_option("--stacked", dest="stacked", action="store_true", help="do a stacked plot [default=%default].") parser.add_option("--add-function", dest="function", type="string", help="add a function to the plot [default=%default].") parser.add_option( "--add-error-bars", dest="error_bars", type="choice", choices=("interleaved", "blocked"), help= "add error bars. The input format is 'interleaved' or 'blocked'. In the interleaved format the error follows each column. I the blocked format first the data, then the errors in the same order [default=%default]." ) parser.set_defaults( legend=None, title=None, hardcopy=None, logscale=None, xtitle=None, ytitle=None, xrange=None, yrange=None, normalize=None, columns="all", headers=True, legend_location="upper right", backend="cairo", symbols="g-D,b-h,r-+,c-+,m-+,y-+,k-o,g-^,b-<,r->,c-D,m-h", dump=False, truncate=False, cumulate=False, reverse_cumulate=False, function=None, add_error_bars=None, as_lines=False, stacked=False, dpi=80, ) (options, args) = E.start(parser) # import matplotlib/pylab. Has to be done here # for batch scripts without GUI. import matplotlib if options.hardcopy: matplotlib.use("cairo") import pylab # put this method here (because it requires pylab) def doStackedPlot(data, legend): colors = [ "red", "blue", "green", "cyan", "magenta", "yellow", "brown", "silver", "purple", "lightyellow", "black", "ivory", "pink", "orange", "gray", "teal" ] ax = data[:, 0] xvals = numpy.concatenate((ax, ax[::-1])) y_top = numpy.zeros(len(ax)) min_y = min(data[:, 1:].flat) max_y = min_y new_legend, dummy_lines = [], [] for i in range(1, len(legend)): new_y_top = y_top + data[:, i] yvals = numpy.concatenate((new_y_top, y_top[::-1])) p = pylab.fill(xvals, yvals, colors[i % len(colors)]) y_top = new_y_top max_y = max(y_top) dummy_lines.append( pylab.plot(xvals, yvals, colors[i % len(colors)])) new_legend.append(legend[i]) if not options.xrange: options.xrange = min(data[:, 0]), max(data[:, 0]) if not options.yrange: options.yrange = 0, max_y return dummy_lines, new_legend if options.as_lines: options.symbols = [] for y in ("-", ":", "--"): for x in "gbrcmyk": options.symbols.append(y + x) else: options.symbols = options.symbols.split(",") if options.xrange: options.xrange = list(map(float, options.xrange.split(","))) if options.yrange: options.yrange = list(map(float, options.yrange.split(","))) # Added support for (inclusive) range format: "1,3,5,7-100" (Gerton # 13/12/06) if options.columns != "all": cols = [] for d in options.columns.split(','): colopts = d.split('-') if len(colopts) == 2: cols += list(range(int(colopts[0]), int(colopts[1]) + 1)) else: cols += [int(d) - 1] options.columns = cols if args: if args[0] == "-": infile = sys.stdin else: infile = IOTools.open_file(args[0], "r") else: infile = sys.stdin if options.truncate: xr = options.xrange else: xr = None data, legend = IOTools.readTable(infile, numeric_type=numpy.float, take=options.columns, headers=options.headers, truncate=xr) if infile != sys.stdin: infile.close() if len(data) == 0: # or data is None: E.info("empty table: no plot") E.stop() return nrows, ncols = data.shape # note: because of MA, iteration makes copy of slices # Solution: inplace edits. if options.cumulate: if options.add_error_bars: raise ValueError("can not add error bars to cumulative histogram") if data.mask.any(): # cumsum does not work with masked arrays, so do it manually for y in range(1, ncols): c = 0 for x in range(0, nrows): if not data.mask[x, y]: data[x, y] += c c = data[x, y] else: for x in range(1, ncols): data[:, x] = data[:, x].cumsum() elif options.reverse_cumulate: if options.add_error_bars: raise ValueError("can not add error bars to cumulative histogram") if data.mask.any(): l = [0] * ncols for x in range(nrows - 1, -1, -1): for y in range(1, ncols): if not data.mask[x, y]: data[x, y] += l[y] l[y] = data[x, y] else: l = [0] * ncols for x in range(nrows - 1, -1, -1): for y in range(1, ncols): data[x, y] += l[y] l[y] = data[x, y] if options.normalize: if options.add_error_bars: raise ValueError("can not add error bars to normalized histogram") if data.mask.any(): m = [0] * ncols for x in range(nrows): for y in range(1, ncols): if not data.mask[x, y]: m[y] = max(m[y], float(data[x, y])) for y in range(1, ncols): if m[y] == 0: m[y] = 1.0 for x in range(nrows): for y in range(1, ncols): data[x, y] = data[x, y] / m[y] else: for x in range(1, ncols): m = float(data[:, x].max()) data[:, x] /= m if options.legend: legend = options.legend.split(",") if options.dump: for d in data: print(d) if options.title: pylab.title(options.title) if options.xtitle: pylab.xlabel(options.xtitle) else: pylab.xlabel(legend[0]) if options.ytitle: pylab.ylabel(options.ytitle) lines = [] # use dummy_lines to workaround a bug in errorbars that # causes the line styles to be set incorrectly. dummy_lines = [] new_legend = [] if options.error_bars: if options.error_bars == "interleaved": step_size = 2 max_size = len(legend) elif options.error_bars == "blocked": step_size = 1 max_size = (len(legend) - 1) / 2 else: step_size = 1 max_size = len(legend) if options.stacked: dummy_lines, new_legend = doStackedPlot(data, legend) else: nplotted = 0 nskipped = 0 for x in range(1, max_size, step_size): s = options.symbols[nplotted % len(options.symbols)] yvals = data[:, x] xvals = numpy.ma.masked_array(data[:, 0], numpy.ma.getmask(yvals)) xvals = xvals.compressed() yvals = yvals.compressed() if len(xvals) == 0: E.warn("skipped empty column %i: %s" % (x, legend[x])) if options.error_bars == "interleaved": yerr = data[:, x + 1] yerr = yerr.compressed() else: yerr = None lines.append(pylab.errorbar(xvals, yvals, yerr=yerr, fmt=s)) dummy_lines.append(pylab.plot(xvals, yvals, s)) new_legend.append(legend[x]) nplotted += 1 E.info("nplotted=%i, nskipped=%i" % (nplotted, nskipped)) if len(lines) == 0: E.stop() return if options.legend_location != "none": pylab.figlegend(dummy_lines, new_legend, options.legend_location) if options.logscale: if "x" in options.logscale: pylab.gca().set_xscale('log') if "y" in options.logscale: pylab.gca().set_yscale('log') if options.xrange: pylab.xlim(options.xrange) if options.yrange: pylab.ylim(options.yrange) if options.function: xstart, xend = pylab.gca().get_xlim() increment = (xend - xstart) / 100.0 exec(("f = lambda x: %s" % options.function), locals()) xvals, yvals = [], [] for x in range(0, 100): xvals.append(xstart) yvals.append(f(xstart)) xstart += increment xvals.append(xstart) yvals.append(f(xstart)) pylab.plot(xvals, yvals) if options.hardcopy: pylab.savefig(os.path.expanduser(options.hardcopy), dpi=options.dpi) else: pylab.show() E.stop()