def run(self): self.prepareRun() if not self.mProgram: raise UsageError("no program specified.") s = subprocess.Popen( "%s" % (self.mProgram), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.mTempdir, close_fds=True, ) (out, err) = s.communicate("\n".join(self.mOptions) + "\n") if s.returncode != 0: raise UsageError, "Error in running phylip.\n%s\n%s\nTemporary directory was %s" % (out, err, self.mTempdir) # Parse output files that might have been created: result = PhylipResult() # parse tree file if os.path.exists("%s/outtree" % self.mTempdir): nexus = TreeTools.Newick2Nexus(open("%s/outtree" % self.mTempdir, "r")) for tree in nexus.trees: TreeTools.MapTaxa(tree, self.mMapPhylip2Input) result.mNexus = nexus if self.mLogLevel >= 1: print "# received tree with %i taxa" % (len(TreeTools.GetTaxa(nexus.trees[0]))) elif os.path.exists("%s/outfile" % self.mTempdir): if self.mProgram in ("dnadist", "protdist"): infile = open("%s/outfile" % self.mTempdir, "r") result.mMatrix, row_headers, col_headers = MatlabTools.readMatrix(infile, format="phylip") result.mRowHeaders = [] for x in row_headers: result.mRowHeaders.append(self.mMapPhylip2Input[x]) result.mColHeaders = result.mRowHeaders elif self.mProgram == "contrast": infile = open("%s/outfile" % self.mTempdir, "r") result.parseContrasts(infile) infile.close() else: raise "other return types not implemented" if self.mLogLevel >= 2: print out if self.mLogLevel == 0: shutil.rmtree(self.mTempdir) return result
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: plot_matrix.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take from table.") parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string", help="write hardcopy to file.", metavar="FILE") parser.add_option("-f", "--file", dest="input_filename", type="string", help="filename with table data.", metavar="FILE") parser.add_option("-p", "--plot", dest="plot", type="string", help="plots to plot.", action="append") parser.add_option("-t", "--threshold", dest="threshold", type="float", help="min threshold to use for counting method.") parser.add_option("-o", "--colours", dest="colours", type="int", help="column with colour information.") parser.add_option("-l", "--plot-labels", dest="labels", type="string", help="column labels for x and y in matched plots.") parser.add_option("-e", "--header-names", dest="headers", action="store_true", help="headers are supplied in matrix.") parser.add_option("--no-headers", dest="headers", action="store_false", help="headers are not supplied in matrix.") parser.add_option("--normalize", dest="normalize", action="store_true", help="normalize matrix.") parser.add_option("--palette", dest="palette", type="choice", choices=("rainbow", "gray", "blue-white-red", "autumn", "bone", "cool", "copper", "flag", "gray", "hot", "hsv", "jet", "pink", "prism", "spring", "summer", "winter", "spectral", "RdBu", "RdGy", "BrBG", "BuGn", "Blues", "Greens", "Reds", "Oranges", "Greys"), help="colour palette [default=%Default]") parser.add_option("--reverse-palette", dest="reverse_palette", action="store_true", help="reverse the palette [default=%default].") parser.add_option("", "--xrange", dest="xrange", type="string", help="xrange.") parser.add_option("", "--yrange", dest="yrange", type="string", help="yrange.") parser.add_option("", "--zrange", dest="zrange", type="string", help="zrange.") parser.add_option("", "--xticks", dest="xticks", type="string", help="xticks.") parser.add_option("", "--yticks", dest="yticks", type="string", help="yticks.") parser.add_option("--bar-format", dest="bar_format", type="string", help="format for ticks on colourbar.") parser.add_option("--title", dest="title", type="string", help="title to use.") parser.add_option("--missing-value", dest="missing", type="float", help="value to use for missing data.") parser.add_option("--subplots", dest="subplots", type="string", help="split matrix into several subplots. Supply number of rows and columns separated by a comma.") parser.set_defaults( hardcopy=None, input_filename="-", columns="all", statistics=[], plot=[], threshold=0.0, labels="x,y", colours=None, xrange=None, yrange=None, zrange=None, palette=None, reverse_palette=False, xticks=None, yticks=None, normalize=False, bar_format="%1.1f", headers=True, missing=None, title=None, subplots=None) (options, args) = E.Start(parser) # import matplotlib/pylab. Has to be done here # for batch scripts without GUI. import matplotlib if options.hardcopy: matplotlib.use("cairo") import pylab if len(args) > 0: options.input_filename = ",".join(args) if options.xticks: options.xticks = options.xticks.split(",") if options.yticks: options.yticks = options.yticks.split(",") if options.xrange: options.xrange = map(float, options.xrange.split(",")) if options.yrange: options.yrange = map(float, options.yrange.split(",")) if options.columns != "all": options.columns = map(lambda x: int(x) - 1, options.columns.split(",")) filenames = options.input_filename.split(",") if len(filenames) > 1: nsubrows = (len(filenames) / 3) + 1 nsubcols = 3 elif options.subplots: nsubrows, nsubcols = [int(x) for x in options.subplots.split(",")] else: nsubrows, nsubcols = 1, 1 nsubplots = nsubrows * nsubcols # Setting up color maps if options.palette: if options.palette == "gray": _gray_data = {'red': ((0., 1, 1), (1., 0, 0)), 'green': ((0., 1, 1), (1., 0, 0)), 'blue': ((0., 1, 1), (1., 0, 0))} LUTSIZE = pylab.rcParams['image.lut'] colors_gray = matplotlib.colors.LinearSegmentedColormap( 'gray', _gray_data, LUTSIZE) plot_id = 0 for filename in filenames: plot_id += 1 pylab.subplot(nsubrows, nsubcols, plot_id) if filename == "-": infile = sys.stdin else: infile = open(filename, "r") matrix, row_headers, col_headers = MatlabTools.readMatrix(infile, numeric_type=numpy.float32, take=options.columns, headers=options.headers, missing=options.missing) if min(matrix.flat) == max(matrix.flat): options.stderr.write("matrix is uniform - no plotting done.\n") sys.exit(0) if options.normalize: v = max(matrix.flat) matrix = matrix / v if options.zrange: options.zrange = GetRange(matrix, options.zrange) nrows, ncols = matrix.shape if options.palette: if options.palette == "gray": color_scheme = colors_gray else: if options.reverse_palette: color_scheme = eval("pylab.cm.%s_r" % options.palette) else: color_scheme = eval("pylab.cm.%s" % options.palette) else: color_scheme = None if options.zrange: vmin, vmax = options.zrange matrix[matrix < vmin] = vmin matrix[matrix > vmax] = vmax else: vmin, vmax = None, None if options.subplots: if nsubcols > 1: increment_x = int(float(nrows + 1) / nsubcols) increment_y = nrows x = 0 y = 0 for n in range(nsubplots): pylab.subplot(nsubrows, nsubcols, plot_id) plot_id += 1 print n, "rows=", nsubrows, "cols=", nsubcols, y, y + increment_y, x, x + increment_x print matrix[y:y + increment_y, x:x + increment_x].shape print matrix.shape plotMatrix(matrix[y:y + increment_y, x:x + increment_x], color_scheme, row_headers[y:y + increment_y], col_headers[x:x + increment_x], 0, 100, options) x += increment_x elif nsubrows > 1: increment_x = int(float(ncols + 1) / nsubrows) x = 0 for n in range(nsubplots): pylab.subplot(nsubrows, nsubcols, plot_id) plot_id += 1 plotMatrix(matrix[0:nrows, x:x + increment_x], color_scheme, row_headers, col_headers[x:x + increment_x], vmin, vmax, options) x += increment_x else: plotMatrix( matrix, color_scheme, row_headers, col_headers, vmin, vmax, options) if options.xrange: pylab.xlim(options.xrange) if options.yrange: pylab.ylim(options.yrange) if options.labels: xlabel, ylabel = options.labels.split(",") pylab.xlabel(xlabel) pylab.ylabel(ylabel) if not options.subplots: pylab.colorbar(format=options.bar_format) if options.title is None or options.title != "": pylab.title(filename) if options.hardcopy: pylab.savefig(os.path.expanduser(options.hardcopy)) else: pylab.show() E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version="%prog version: $Id: matrix2matrix.py 2782 2009-09-10 11:40:29Z andreas $") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=("normalize-by-min-diagonal", "normalize-by-column", "log", "ln", "negzero2value", "set-diagonal", "subtract-matrix", "mix-matrix", "normalize-by-matrix", "normalize-by-column-max", "normalize-by-row-max", "normalize-by-column-min", "normalize-by-row-min", "normalize-by-column-median", "normalize-by-row-median", "normalize-by-column-mean", "normalize-by-row-mean", "normalize-by-column-total", "normalize-by-row-total", "correspondence-analysis", "normalize-by-value", "add-value", "sort-rows", "sort-columns", "transpose", "upper-bound", "lower-bound", "subtract-first-col", "multiply-by-value", "divide-by-value", "mask-rows", "mask-columns", "mask-rows-and-columns", "symmetrize-mean", "symmetrize-max", "symmetrize-min", ), help="""method to use [default=%default]""" ) parser.add_option("-s", "--scale", dest="scale", type="float", help="factor to scale matrix by [default=%default].") parser.add_option("-f", "--format", dest="format", type="string", help="output number format [default=%default].") parser.add_option("--rows-tsv-file", dest="filename_rows", type="string", help="filename with rows to mask [default=%default].") parser.add_option("--columns-tsv-file", dest="filename_columns", type="string", help="filename with columns to mask [default=%default].") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="Parameters for various functions.") parser.add_option("-t", "--header-names", dest="headers", action="store_true", help="matrix has row/column headers.") parser.add_option("--no-headers", dest="headers", action="store_false", help="matrix has no row/column headers.") parser.add_option("-a", "--value", dest="value", type="float", help="value to use for various algorithms.") parser.add_option("-i", "--input-format", dest="input_format", type="choice", choices=("full", "sparse", "phylip"), help="""input format for matrix.""" ) parser.add_option("-o", "--output-format", dest="output_format", type="choice", choices=("full", "sparse", "phylip"), help="""output format for matrix.""" ) parser.add_option("--missing-value", dest="missing", type="float", help="value to use for missing values. If not set, missing values will cause the script to fail [default=%default].") parser.set_defaults( methods=[], scale=1.0, headers=True, format="%6.4f", output_format="full", input_format="full", value=0.0, parameters="", write_separators=True, filename_rows=None, filename_columns=None, missing=None, ) (options, args) = E.Start(parser) options.parameters = options.parameters.split(",") lines = filter(lambda x: x[0] != "#", sys.stdin.readlines()) if len(lines) == 0: raise IOError("no input") chunks = filter(lambda x: lines[x][0] == ">", range(len(lines))) if not chunks: options.write_separators = False chunks = [-1] chunks.append(len(lines)) if options.filename_rows: row_names, n = IOTools.ReadList(open(options.filename_rows, "r")) if options.filename_columns: column_names, n = IOTools.ReadList(open(options.filename_columns, "r")) for chunk in range(len(chunks) - 1): try: raw_matrix, row_headers, col_headers = MatlabTools.readMatrix(StringIO.StringIO("".join(lines[chunks[chunk] + 1:chunks[chunk + 1]])), format=options.input_format, headers=options.headers, missing=options.missing) except ValueError, msg: E.warn("matrix could not be read: %s" % msg) continue nrows, ncols = raw_matrix.shape E.debug("read matrix: %i x %i, %i row titles, %i colum titles" % (nrows, ncols, len(row_headers), len(col_headers))) parameter = 0 for method in options.methods: matrix = numpy.reshape(numpy.array(raw_matrix), raw_matrix.shape) if method in ("normalize-by-matrix", "subtract-matrix", "mix-matrix", "add-matrix"): other_matrix, other_row_headers, other_col_headers = MatlabTools.ReadMatrix(open(options.parameters[parameter], "r"), headers=options.headers) other_nrows, other_ncols = other_matrix.shape if options.loglevel >= 2: options.stdlog.write("# read second matrix from %s: %i x %i, %i row titles, %i colum titles.\n" % (options.parameters[parameter], other_nrows, other_ncols, len(other_row_headers), len(other_col_headers))) parameter += 1 elif method == "normalize-by-min-diagonal": for x in range(nrows): for y in range(ncols): m = min(raw_matrix[x, x], raw_matrix[y, y]) if m > 0: matrix[x, y] = raw_matrix[x, y] / m elif method == "normalize-by-column": if nrows != ncols: raise "only supported for symmeric matrices." for x in range(nrows): for y in range(ncols): if raw_matrix[y, y] > 0: matrix[x, y] = raw_matrix[x, y] / raw_matrix[y, y] elif method == "normalize-by-value": matrix = raw_matrix / float(options.parameters[parameter]) parameter += 1 elif method == "normalize-by-row": if nrows != ncols: raise "only supported for symmeric matrices." for x in range(nrows): for y in range(ncols): if raw_matrix[y, y] > 0: matrix[x, y] = raw_matrix[x, y] / raw_matrix[x, x] elif method == "subtract-first-col": for x in range(nrows): for y in range(ncols): matrix[x, y] -= raw_matrix[x, 0] elif method.startswith("normalize-by-column"): if method.endswith("max"): f = max elif method.endswith("min"): f = min elif method.endswith("median"): f = scipy.median elif method.endswith("mean"): f = scipy.mean elif method.endswith("total"): f = sum for y in range(ncols): m = f(matrix[:, y]) if m != 0: for x in range(nrows): matrix[x, y] = matrix[x, y] / m elif method.startswith("normalize-by-row"): if method.endswith("max"): f = max elif method.endswith("min"): f = min elif method.endswith("median"): f = scipy.median elif method.endswith("mean"): f = scipy.mean elif method.endswith("total"): f = sum for x in range(nrows): m = f(matrix[x, :]) if m != 0: for y in range(ncols): matrix[x, y] = raw_matrix[x, y] / m elif method == "negzero2value": # set zero/negative values to a value for x in range(nrows): for y in range(ncols): if matrix[x, y] <= 0: matrix[x, y] = options.value elif method == "minmax": # set zero/negative values to a value for x in range(nrows): for y in range(ncols): matrix[x, y], matrix[y, x] = \ min(matrix[x, y], matrix[y, x]), \ max(matrix[x, y], matrix[y, x]) elif method == "log": # apply log to all values. for x in range(nrows): for y in range(ncols): if matrix[x, y] > 0: matrix[x, y] = math.log10(matrix[x, y]) elif method == "ln": for x in range(nrows): for y in range(ncols): if matrix[x, y] > 0: matrix[x, y] = math.log(matrix[x, y]) elif method == "transpose": matrix = numpy.transpose(matrix) row_headers, col_headers = col_headers, row_headers nrows, ncols = ncols, nrows elif method == "mul": matrix = numpy.dot(matrix, numpy.transpose(matrix)) col_headers = row_headers elif method == "multiply-by-value": matrix *= options.value elif method == "divide-by-value": matrix /= options.value elif method == "add-value": matrix += options.value elif method == "angle": # write angles between col vectors v1 = numpy.sqrt(numpy.sum(numpy.power(matrix, 2), 0)) matrix = numpy.dot(numpy.transpose(matrix), matrix) row_headers = col_headers nrows = ncols for x in range(nrows): for y in range(ncols): matrix[x, y] /= v1[x] * v1[y] elif method == "euclid": # convert to euclidean distance matrix matrix = numpy.zeros((ncols, ncols), numpy.float) for c1 in range(0, ncols - 1): for c2 in range(c1 + 1, ncols): for r in range(0, nrows): d = raw_matrix[r][c1] - raw_matrix[r][c2] matrix[c1, c2] += (d * d) matrix[c2, c1] = matrix[c1, c2] matrix = numpy.sqrt(matrix) row_headers = col_headers nrows = ncols elif method.startswith("symmetrize"): f = method.split("-")[1] if f == "max": f = max elif f == "min": f = min elif f == "mean": f = lambda x, y: float(x + y) / 2 if nrows != ncols: raise ValueError( "symmetrize only available for symmetric matrices") if row_headers != col_headers: raise ValueError( "symmetrize not available for permuted matrices") for x in range(nrows): for y in range(ncols): matrix[x, y] = matrix[y, x] = f( matrix[x, y], matrix[y, x]) elif method == "sub": matrix = options.value - matrix elif method in ("lower-bound", "upper-bound"): boundary = float(options.parameters[parameter]) new_value = float(options.parameters[parameter + 1]) parameter += 2 if method == "upper-bound": for x in range(nrows): for y in range(ncols): if matrix[x, y] > boundary: matrix[x, y] = new_value else: for x in range(nrows): for y in range(ncols): if matrix[x, y] < boundary: matrix[x, y] = new_value elif method == "subtract-matrix": matrix = matrix - other_matrix elif method == "add-matrix": matrix = matrix + other_matrix elif method == "normalize-by-matrix": # set 0s to 1 in the other matrix for x in range(nrows): for y in range(ncols): if other_matrix[x, y] == 0: other_matrix[x, y] = 1.0 matrix = matrix / other_matrix elif method == "mix-matrix": for x in range(len(other_row_headers) - 1): for y in range(x + 1, len(other_col_headers)): matrix[x, y] = other_matrix[x, y] elif method == "set-diagonal": value = float(options.parameters[parameter]) for x in range(min(nrows, ncols)): matrix[x, x] = value parameter += 1 elif method == "transpose": matrix = numpy.transpose(raw_matrix) row_headers, col_headers = col_headers, row_headers elif method == "correspondence-analysis": row_indices, col_indices = CorrespondenceAnalysis.GetIndices( raw_matrix) map_row_new2old = numpy.argsort(row_indices) map_col_new2old = numpy.argsort(col_indices) matrix, row_headers, col_headers = CorrespondenceAnalysis.GetPermutatedMatrix(raw_matrix, map_row_new2old, map_col_new2old, row_headers=row_headers, col_headers=col_headers) elif method == "mask-rows": r = set(row_names) for x in range(len(row_headers)): if row_headers[x] in r: matrix[x, :] = options.value elif method == "mask-columns": r = set(column_names) for x in range(len(col_headers)): if col_headers[x] in r: matrix[:, x] = options.value elif method == "mask-rows-and-columns": r = set(row_names) c = set(column_names) for x in range(len(row_headers)): for y in range(len(col_headers)): if row_headers[x] in r and col_headers[y] in c: matrix[x, y] = options.value raw_matrix = numpy.reshape(numpy.array(matrix), matrix.shape) else: # for simple re-formatting jobs matrix = raw_matrix if options.write_separators: options.stdout.write(lines[chunks[chunk]]) MatlabTools.writeMatrix(sys.stdout, matrix, value_format=options.format, format=options.output_format, row_headers=row_headers, col_headers=col_headers)
filename_info = None, filename_tissues = None, headers = True, aggregate = "mean", value_format = "%5.2f", method="counts") (options, args) = E.Start( parser ) if not options.filename_map: raise "please supply filename mapping probesets to identifiers." map_probe2locus = IOTools.ReadMap( open(options.filename_map, "r") ) matrix, row_headers, col_headers = MatlabTools.readMatrix( sys.stdin, format="full", headers = options.headers ) if options.filename_tissues: tissues, nerrors = IOTools.ReadList( open(options.filename_tissues, "r") ) tissues = set(tissues) columns = [] for x in range(len(col_headers)): if col_headers[x] in tissues: columns.append( x ) else: columns = range(len(col_headers)) nrows, ncols = len(row_headers), len(col_headers) ninput, noutput, nkept = 0, 0, 0
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: plot_matrix.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take from table.") parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string", help="write hardcopy to file.", metavar="FILE") parser.add_option("-f", "--file", dest="input_filename", type="string", help="filename with table data.", metavar="FILE") parser.add_option("-p", "--plot", dest="plot", type="string", help="plots to plot.", action="append") parser.add_option("-t", "--threshold", dest="threshold", type="float", help="min threshold to use for counting method.") parser.add_option("-o", "--colours", dest="colours", type="int", help="column with colour information.") parser.add_option("-l", "--plot-labels", dest="labels", type="string", help="column labels for x and y in matched plots.") parser.add_option("-e", "--header-names", dest="headers", action="store_true", help="headers are supplied in matrix.") parser.add_option("--no-headers", dest="headers", action="store_false", help="headers are not supplied in matrix.") parser.add_option("--normalize", dest="normalize", action="store_true", help="normalize matrix.") parser.add_option("--palette", dest="palette", type="choice", choices=("rainbow", "gray", "blue-white-red", "autumn", "bone", "cool", "copper", "flag", "gray", "hot", "hsv", "jet", "pink", "prism", "spring", "summer", "winter", "spectral", "RdBu", "RdGy", "BrBG", "BuGn", "Blues", "Greens", "Reds", "Oranges", "Greys"), help="colour palette [default=%Default]") parser.add_option("--reverse-palette", dest="reverse_palette", action="store_true", help="reverse the palette [default=%default].") parser.add_option("", "--xrange", dest="xrange", type="string", help="xrange.") parser.add_option("", "--yrange", dest="yrange", type="string", help="yrange.") parser.add_option("", "--zrange", dest="zrange", type="string", help="zrange.") parser.add_option("", "--xticks", dest="xticks", type="string", help="xticks.") parser.add_option("", "--yticks", dest="yticks", type="string", help="yticks.") parser.add_option("--bar-format", dest="bar_format", type="string", help="format for ticks on colourbar.") parser.add_option("--title", dest="title", type="string", help="title to use.") parser.add_option("--missing-value", dest="missing", type="float", help="value to use for missing data.") parser.add_option( "--subplots", dest="subplots", type="string", help= "split matrix into several subplots. Supply number of rows and columns separated by a comma." ) parser.set_defaults(hardcopy=None, input_filename="-", columns="all", statistics=[], plot=[], threshold=0.0, labels="x,y", colours=None, xrange=None, yrange=None, zrange=None, palette=None, reverse_palette=False, xticks=None, yticks=None, normalize=False, bar_format="%1.1f", headers=True, missing=None, title=None, subplots=None) (options, args) = E.Start(parser) # import matplotlib/pylab. Has to be done here # for batch scripts without GUI. import matplotlib if options.hardcopy: matplotlib.use("cairo") import pylab if len(args) > 0: options.input_filename = ",".join(args) if options.xticks: options.xticks = options.xticks.split(",") if options.yticks: options.yticks = options.yticks.split(",") if options.xrange: options.xrange = list(map(float, options.xrange.split(","))) if options.yrange: options.yrange = list(map(float, options.yrange.split(","))) if options.columns != "all": options.columns = [int(x) - 1 for x in options.columns.split(",")] filenames = options.input_filename.split(",") if len(filenames) > 1: nsubrows = (len(filenames) / 3) + 1 nsubcols = 3 elif options.subplots: nsubrows, nsubcols = [int(x) for x in options.subplots.split(",")] else: nsubrows, nsubcols = 1, 1 nsubplots = nsubrows * nsubcols # Setting up color maps if options.palette: if options.palette == "gray": _gray_data = { 'red': ((0., 1, 1), (1., 0, 0)), 'green': ((0., 1, 1), (1., 0, 0)), 'blue': ((0., 1, 1), (1., 0, 0)) } LUTSIZE = pylab.rcParams['image.lut'] colors_gray = matplotlib.colors.LinearSegmentedColormap( 'gray', _gray_data, LUTSIZE) plot_id = 0 for filename in filenames: plot_id += 1 pylab.subplot(nsubrows, nsubcols, plot_id) if filename == "-": infile = sys.stdin else: infile = IOTools.openFile(filename, "r") matrix, row_headers, col_headers = MatlabTools.readMatrix( infile, numeric_type=numpy.float32, take=options.columns, headers=options.headers, missing=options.missing) if min(matrix.flat) == max(matrix.flat): options.stderr.write("matrix is uniform - no plotting done.\n") sys.exit(0) if options.normalize: v = max(matrix.flat) matrix = matrix / v if options.zrange: options.zrange = GetRange(matrix, options.zrange) nrows, ncols = matrix.shape if options.palette: if options.palette == "gray": color_scheme = colors_gray else: if options.reverse_palette: color_scheme = eval("pylab.cm.%s_r" % options.palette) else: color_scheme = eval("pylab.cm.%s" % options.palette) else: color_scheme = None if options.zrange: vmin, vmax = options.zrange matrix[matrix < vmin] = vmin matrix[matrix > vmax] = vmax else: vmin, vmax = None, None if options.subplots: if nsubcols > 1: increment_x = int(float(nrows + 1) / nsubcols) increment_y = nrows x = 0 y = 0 for n in range(nsubplots): pylab.subplot(nsubrows, nsubcols, plot_id) plot_id += 1 print(n, "rows=", nsubrows, "cols=", nsubcols, y, y + increment_y, x, x + increment_x) print(matrix[y:y + increment_y, x:x + increment_x].shape) print(matrix.shape) plotMatrix(matrix[y:y + increment_y, x:x + increment_x], color_scheme, row_headers[y:y + increment_y], col_headers[x:x + increment_x], 0, 100, options) x += increment_x elif nsubrows > 1: increment_x = int(float(ncols + 1) / nsubrows) x = 0 for n in range(nsubplots): pylab.subplot(nsubrows, nsubcols, plot_id) plot_id += 1 plotMatrix(matrix[0:nrows, x:x + increment_x], color_scheme, row_headers, col_headers[x:x + increment_x], vmin, vmax, options) x += increment_x else: plotMatrix(matrix, color_scheme, row_headers, col_headers, vmin, vmax, options) if options.xrange: pylab.xlim(options.xrange) if options.yrange: pylab.ylim(options.yrange) if options.labels: xlabel, ylabel = options.labels.split(",") pylab.xlabel(xlabel) pylab.ylabel(ylabel) if not options.subplots: pylab.colorbar(format=options.bar_format) if options.title is None or options.title != "": pylab.title(filename) if options.hardcopy: pylab.savefig(os.path.expanduser(options.hardcopy)) else: pylab.show() E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id: matrix2stats.py 2795 2009-09-16 15:29:23Z andreas $", usage=globals()["__doc__"]) parser.add_option("-m", "--method", dest="method", type="choice", choices=("chi-squared", "pearson-chi-squared"), help="statistical methods to apply.") parser.add_option("-t", "--header-names", dest="headers", action="store_true", help="matrix has row/column headers.") parser.add_option("--no-headers", dest="headers", action="store_false", help="matrix has no row/column headers.") parser.add_option("-i", "--input-format", dest="input_format", type="choice", choices=("full", "sparse", "phylip"), help="""input format for matrix.""" ) parser.add_option("-o", "--output-format", dest="output_format", type="choice", choices=("full", "sparse", "phylip"), help="""output format for matrix.""" ) parser.add_option("-p", "--parameters", dest="parameters", action="append", type="string", help="parameters for various functions.") parser.add_option("-a", "--iteration", dest="iteration", type="choice", choices=("pairwise", "all-vs-all"), help="""how to compute stats [%default].""" ) parser.set_defaults( method="chi-squared", headers=True, value_format="%6.4f", pvalue_format="%6.4e", input_format="full", write_separators=True, parameters=[], iteration=None, ) (options, args) = E.Start(parser) lines = [x for x in sys.stdin.readlines() if x[0] != "#"] chunks = [x for x in range(len(lines)) if lines[x][0] == ">"] if not chunks: options.write_separators = False chunks = [-1] chunks.append(len(lines)) ninput, noutput, nskipped = 0, 0, 0 if options.write_separators: options.stdout.write("test\t") header_prefix = "" if options.method == "chi-squared": header_prefix = "observed\texpected" options.stdout.write("\t".join( (header_prefix, "n", "min", "max", "chi", "df", "P", "passed", "phi")) + "\n") elif options.method in ("pearson-chi-squared",): options.stdout.write("column\t") options.stdout.write("\t".join( (header_prefix, "n", "prob", "obs", "exp", "chi", "df", "P", "passed", "phi")) + "\n") if len(options.parameters) == 0: raise "out of parameters - please supply probability or filename with probabilities." param = options.parameters[0] del options.parameters[0] if options.write_separators: probabilities = IOTools.ReadMap( IOTools.openFile(param, "r"), map_functions=(str, float)) else: probability = float(param) for x in range(len(chunks) - 1): ninput += 1 matrix, row_headers, col_headers = MatlabTools.readMatrix( StringIO("".join(lines[chunks[x] + 1:chunks[x + 1]])), format=options.input_format, headers=options.headers) nrows, ncols = matrix.shape if options.loglevel >= 2: options.stdlog.write("# read matrix: %i x %i, %i row titles, %i colum titles.\n" % (nrows, ncols, len(row_headers), len(col_headers))) if options.write_separators: options.stdout.write(lines[chunks[x]][1:-1] + "\t") pairs = [] if options.iteration == "pairwise": pairs = [] for row1 in range(0, len(row_headers)): for row2 in range(row1 + 1, len(row_headers)): pairs.append((row1, row2)) elif options.iteration == "all-vs-all": pairs = [] for row1 in range(0, len(row_headers)): for row2 in range(0, len(row_headers)): if row1 == row2: continue pairs.append((row1, row2)) if options.method == "chi-squared": for row1, row2 in pairs: row_header1 = row_headers[row1] row_header2 = row_headers[row2] try: result = Stats.doChiSquaredTest( numpy.vstack((matrix[row1], matrix[row2]))) except ValueError: nskipped += 1 continue noutput += 1 options.stdout.write("\t".join(( "%s" % row_header1, "%s" % row_header2, "%i" % result.mSampleSize, "%i" % min(matrix.flat), "%i" % max(matrix.flat), options.value_format % result.mChiSquaredValue, "%i" % result.mDegreesFreedom, options.pvalue_format % result.mProbability, "%s" % result.mSignificance, options.value_format % result.mPhi)) + "\n") elif options.method == "pearson-chi-squared": if nrows != 2: raise ValueError("only implemented for 2xn table") if options.write_separators: id = re.match("(\S+)", lines[chunks[x]][1:-1]).groups()[0] probability = probabilities[id] for col in range(ncols): options.stdout.write("%s\t" % col_headers[col]) result = Stats.doPearsonChiSquaredTest( probability, sum(matrix[:, col]), matrix[0, col]) options.stdout.write("\t".join(( "%i" % result.mSampleSize, "%f" % probability, "%i" % result.mObserved, "%f" % result.mExpected, options.value_format % result.mChiSquaredValue, "%i" % result.mDegreesFreedom, options.pvalue_format % result.mProbability, "%s" % result.mSignificance, options.value_format % result.mPhi))) if col < ncols - 1: options.stdout.write("\n") if options.write_separators: options.stdout.write(lines[chunks[x]][1:-1] + "\t") options.stdout.write("\n") E.info("# ninput=%i, noutput=%i, nskipped=%i\n" % (ninput, noutput, nskipped)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: matrix2matrix.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=( "normalize-by-min-diagonal", "normalize-by-column", "log", "ln", "negzero2value", "set-diagonal", "subtract-matrix", "mix-matrix", "normalize-by-matrix", "normalize-by-column-max", "normalize-by-row-max", "normalize-by-column-min", "normalize-by-row-min", "normalize-by-column-median", "normalize-by-row-median", "normalize-by-column-mean", "normalize-by-row-mean", "normalize-by-column-total", "normalize-by-row-total", "correspondence-analysis", "normalize-by-value", "add-value", "sort-rows", "sort-columns", "transpose", "upper-bound", "lower-bound", "subtract-first-col", "multiply-by-value", "divide-by-value", "mask-rows", "mask-columns", "mask-rows-and-columns", "symmetrize-mean", "symmetrize-max", "symmetrize-min", ), help="""method to use [default=%default]""") parser.add_option("-s", "--scale", dest="scale", type="float", help="factor to scale matrix by [default=%default].") parser.add_option("-f", "--format", dest="format", type="string", help="output number format [default=%default].") parser.add_option("--filename-rows", dest="filename_rows", type="string", help="filename with rows to mask [default=%default].") parser.add_option("--filename-columns", dest="filename_columns", type="string", help="filename with columns to mask [default=%default].") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="Parameters for various functions.") parser.add_option("-t", "--headers", dest="headers", action="store_true", help="matrix has row/column headers.") parser.add_option("--no-headers", dest="headers", action="store_false", help="matrix has no row/column headers.") parser.add_option("-a", "--value", dest="value", type="float", help="value to use for various algorithms.") parser.add_option("-i", "--input-format", dest="input_format", type="choice", choices=("full", "sparse", "phylip"), help="""input format for matrix.""") parser.add_option("-o", "--output-format", dest="output_format", type="choice", choices=("full", "sparse", "phylip"), help="""output format for matrix.""") parser.add_option( "--missing", dest="missing", type="float", help= "value to use for missing values. If not set, missing values will cause the script to fail [default=%default]." ) parser.set_defaults( methods=[], scale=1.0, headers=True, format="%6.4f", output_format="full", input_format="full", value=0.0, parameters="", write_separators=True, filename_rows=None, filename_columns=None, missing=None, ) (options, args) = E.Start(parser) options.parameters = options.parameters.split(",") lines = filter(lambda x: x[0] != "#", sys.stdin.readlines()) if len(lines) == 0: raise IOError("no input") chunks = filter(lambda x: lines[x][0] == ">", range(len(lines))) if not chunks: options.write_separators = False chunks = [-1] chunks.append(len(lines)) if options.filename_rows: row_names, n = IOTools.ReadList(open(options.filename_rows, "r")) if options.filename_columns: column_names, n = IOTools.ReadList(open(options.filename_columns, "r")) for chunk in range(len(chunks) - 1): try: raw_matrix, row_headers, col_headers = MatlabTools.readMatrix( StringIO.StringIO("".join(lines[chunks[chunk] + 1:chunks[chunk + 1]])), format=options.input_format, headers=options.headers, missing=options.missing) except ValueError, msg: E.warn("matrix could not be read: %s" % msg) continue nrows, ncols = raw_matrix.shape E.debug("read matrix: %i x %i, %i row titles, %i colum titles" % (nrows, ncols, len(row_headers), len(col_headers))) parameter = 0 for method in options.methods: matrix = numpy.reshape(numpy.array(raw_matrix), raw_matrix.shape) if method in ("normalize-by-matrix", "subtract-matrix", "mix-matrix", "add-matrix"): other_matrix, other_row_headers, other_col_headers = MatlabTools.ReadMatrix( open(options.parameters[parameter], "r"), headers=options.headers) other_nrows, other_ncols = other_matrix.shape if options.loglevel >= 2: options.stdlog.write( "# read second matrix from %s: %i x %i, %i row titles, %i colum titles.\n" % (options.parameters[parameter], other_nrows, other_ncols, len(other_row_headers), len(other_col_headers))) parameter += 1 elif method == "normalize-by-min-diagonal": for x in range(nrows): for y in range(ncols): m = min(raw_matrix[x, x], raw_matrix[y, y]) if m > 0: matrix[x, y] = raw_matrix[x, y] / m elif method == "normalize-by-column": if nrows != ncols: raise "only supported for symmeric matrices." for x in range(nrows): for y in range(ncols): if raw_matrix[y, y] > 0: matrix[x, y] = raw_matrix[x, y] / raw_matrix[y, y] elif method == "normalize-by-value": matrix = raw_matrix / float(options.parameters[parameter]) parameter += 1 elif method == "normalize-by-row": if nrows != ncols: raise "only supported for symmeric matrices." for x in range(nrows): for y in range(ncols): if raw_matrix[y, y] > 0: matrix[x, y] = raw_matrix[x, y] / raw_matrix[x, x] elif method == "subtract-first-col": for x in range(nrows): for y in range(ncols): matrix[x, y] -= raw_matrix[x, 0] elif method.startswith("normalize-by-column"): if method.endswith("max"): f = max elif method.endswith("min"): f = min elif method.endswith("median"): f = scipy.median elif method.endswith("mean"): f = scipy.mean elif method.endswith("total"): f = sum for y in range(ncols): m = f(matrix[:, y]) if m != 0: for x in range(nrows): matrix[x, y] = matrix[x, y] / m elif method.startswith("normalize-by-row"): if method.endswith("max"): f = max elif method.endswith("min"): f = min elif method.endswith("median"): f = scipy.median elif method.endswith("mean"): f = scipy.mean elif method.endswith("total"): f = sum for x in range(nrows): m = f(matrix[x, :]) if m != 0: for y in range(ncols): matrix[x, y] = raw_matrix[x, y] / m elif method == "negzero2value": # set zero/negative values to a value for x in range(nrows): for y in range(ncols): if matrix[x, y] <= 0: matrix[x, y] = options.value elif method == "minmax": # set zero/negative values to a value for x in range(nrows): for y in range(ncols): matrix[x, y], matrix[y, x] = \ min(matrix[x, y], matrix[y, x]), \ max(matrix[x, y], matrix[y, x]) elif method == "log": # apply log to all values. for x in range(nrows): for y in range(ncols): if matrix[x, y] > 0: matrix[x, y] = math.log10(matrix[x, y]) elif method == "ln": for x in range(nrows): for y in range(ncols): if matrix[x, y] > 0: matrix[x, y] = math.log(matrix[x, y]) elif method == "transpose": matrix = numpy.transpose(matrix) row_headers, col_headers = col_headers, row_headers nrows, ncols = ncols, nrows elif method == "mul": matrix = numpy.dot(matrix, numpy.transpose(matrix)) col_headers = row_headers elif method == "multiply-by-value": matrix *= options.value elif method == "divide-by-value": matrix /= options.value elif method == "add-value": matrix += options.value elif method == "angle": # write angles between col vectors v1 = numpy.sqrt(numpy.sum(numpy.power(matrix, 2), 0)) matrix = numpy.dot(numpy.transpose(matrix), matrix) row_headers = col_headers nrows = ncols for x in range(nrows): for y in range(ncols): matrix[x, y] /= v1[x] * v1[y] elif method == "euclid": # convert to euclidean distance matrix matrix = numpy.zeros((ncols, ncols), numpy.float) for c1 in range(0, ncols - 1): for c2 in range(c1 + 1, ncols): for r in range(0, nrows): d = raw_matrix[r][c1] - raw_matrix[r][c2] matrix[c1, c2] += (d * d) matrix[c2, c1] = matrix[c1, c2] matrix = numpy.sqrt(matrix) row_headers = col_headers nrows = ncols elif method.startswith("symmetrize"): f = method.split("-")[1] if f == "max": f = max elif f == "min": f = min elif f == "mean": f = lambda x, y: float(x + y) / 2 if nrows != ncols: raise ValueError( "symmetrize only available for symmetric matrices") if row_headers != col_headers: raise ValueError( "symmetrize not available for permuted matrices") for x in range(nrows): for y in range(ncols): matrix[x, y] = matrix[y, x] = f(matrix[x, y], matrix[y, x]) elif method == "sub": matrix = options.value - matrix elif method in ("lower-bound", "upper-bound"): boundary = float(options.parameters[parameter]) new_value = float(options.parameters[parameter + 1]) parameter += 2 if method == "upper-bound": for x in range(nrows): for y in range(ncols): if matrix[x, y] > boundary: matrix[x, y] = new_value else: for x in range(nrows): for y in range(ncols): if matrix[x, y] < boundary: matrix[x, y] = new_value elif method == "subtract-matrix": matrix = matrix - other_matrix elif method == "add-matrix": matrix = matrix + other_matrix elif method == "normalize-by-matrix": # set 0s to 1 in the other matrix for x in range(nrows): for y in range(ncols): if other_matrix[x, y] == 0: other_matrix[x, y] = 1.0 matrix = matrix / other_matrix elif method == "mix-matrix": for x in range(len(other_row_headers) - 1): for y in range(x + 1, len(other_col_headers)): matrix[x, y] = other_matrix[x, y] elif method == "set-diagonal": value = float(options.parameters[parameter]) for x in range(min(nrows, ncols)): matrix[x, x] = value parameter += 1 elif method == "transpose": matrix = numpy.transpose(raw_matrix) row_headers, col_headers = col_headers, row_headers elif method == "correspondence-analysis": row_indices, col_indices = CorrespondenceAnalysis.GetIndices( raw_matrix) map_row_new2old = numpy.argsort(row_indices) map_col_new2old = numpy.argsort(col_indices) matrix, row_headers, col_headers = CorrespondenceAnalysis.GetPermutatedMatrix( raw_matrix, map_row_new2old, map_col_new2old, row_headers=row_headers, col_headers=col_headers) elif method == "mask-rows": r = set(row_names) for x in range(len(row_headers)): if row_headers[x] in r: matrix[x, :] = options.value elif method == "mask-columns": r = set(column_names) for x in range(len(col_headers)): if col_headers[x] in r: matrix[:, x] = options.value elif method == "mask-rows-and-columns": r = set(row_names) c = set(column_names) for x in range(len(row_headers)): for y in range(len(col_headers)): if row_headers[x] in r and col_headers[y] in c: matrix[x, y] = options.value raw_matrix = numpy.reshape(numpy.array(matrix), matrix.shape) else: # for simple re-formatting jobs matrix = raw_matrix if options.write_separators: options.stdout.write(lines[chunks[chunk]]) MatlabTools.writeMatrix(sys.stdout, matrix, value_format=options.format, format=options.output_format, row_headers=row_headers, col_headers=col_headers)
if not chunks: options.write_separators = False chunks = [-1] chunks.append( len(lines) ) if options.filename_rows: row_names, n = IOTools.ReadList( open( options.filename_rows, "r") ) if options.filename_columns: column_names, n = IOTools.ReadList( open( options.filename_columns, "r") ) for chunk in range(len(chunks) -1 ): try: raw_matrix, row_headers, col_headers = MatlabTools.readMatrix( StringIO.StringIO("".join(lines[chunks[chunk]+1:chunks[chunk+1]])), format=options.input_format, headers = options.headers, missing = options.missing ) except ValueError, msg: E.warn( "matrix could not be read: %s" % msg) continue nrows, ncols = raw_matrix.shape E.debug("read matrix: %i x %i, %i row titles, %i colum titles" %\ (nrows, ncols, len(row_headers), len(col_headers))) parameter = 0 for method in options.methods: matrix = numpy.reshape( numpy.array(raw_matrix), raw_matrix.shape )
colors_gray = matplotlib.colors.LinearSegmentedColormap('gray', _gray_data, LUTSIZE) plot_id = 0 for filename in filenames: plot_id += 1 pylab.subplot( nsubrows, nsubcols, plot_id) if filename == "-": infile = sys.stdin else: infile = open(filename, "r") matrix,row_headers,col_headers = MatlabTools.readMatrix( infile, numeric_type=numpy.float32, take=options.columns, headers = options.headers, missing = options.missing ) if min(matrix.flat) == max(matrix.flat): options.stderr.write( "matrix is uniform - no plotting done.\n") sys.exit(0) if options.normalize: v = max(matrix.flat) matrix = matrix / v if options.zrange: options.zrange = GetRange( matrix, options.zrange ) nrows, ncols = matrix.shape
parser.set_defaults(filename_map=None, filename_info=None, filename_tissues=None, headers=True, aggregate="mean", value_format="%5.2f", method="counts") (options, args) = E.Start(parser) if not options.filename_map: raise "please supply filename mapping probesets to identifiers." map_probe2locus = IOTools.ReadMap(open(options.filename_map, "r")) matrix, row_headers, col_headers = MatlabTools.readMatrix( sys.stdin, format="full", headers=options.headers) if options.filename_tissues: tissues, nerrors = IOTools.ReadList(open(options.filename_tissues, "r")) tissues = set(tissues) columns = [] for x in range(len(col_headers)): if col_headers[x] in tissues: columns.append(x) else: columns = range(len(col_headers)) nrows, ncols = len(row_headers), len(col_headers) ninput, noutput, nkept = 0, 0, 0
def run(self): self.prepareRun() if not self.mProgram: raise UsageError("no program specified.") s = subprocess.Popen("%s" % (self.mProgram), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.mTempdir, close_fds=True) (out, err) = s.communicate("\n".join(self.mOptions) + "\n") if s.returncode != 0: raise UsageError, "Error in running phylip.\n%s\n%s\nTemporary directory was %s" % ( out, err, self.mTempdir) # Parse output files that might have been created: result = PhylipResult() # parse tree file if os.path.exists("%s/outtree" % self.mTempdir): nexus = TreeTools.Newick2Nexus( open("%s/outtree" % self.mTempdir, "r")) for tree in nexus.trees: TreeTools.MapTaxa(tree, self.mMapPhylip2Input) result.mNexus = nexus if self.mLogLevel >= 1: print "# received tree with %i taxa" % (len( TreeTools.GetTaxa(nexus.trees[0]))) elif os.path.exists("%s/outfile" % self.mTempdir): if self.mProgram in ("dnadist", "protdist"): infile = open("%s/outfile" % self.mTempdir, "r") result.mMatrix, row_headers, col_headers = MatlabTools.readMatrix( infile, format="phylip") result.mRowHeaders = [] for x in row_headers: result.mRowHeaders.append(self.mMapPhylip2Input[x]) result.mColHeaders = result.mRowHeaders elif self.mProgram == "contrast": infile = open("%s/outfile" % self.mTempdir, "r") result.parseContrasts(infile) infile.close() else: raise "other return types not implemented" if self.mLogLevel >= 2: print out if self.mLogLevel == 0: shutil.rmtree(self.mTempdir) return result