Python MatlabTools Exemples, CGAT.MatlabTools Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : WrapperPhylip.py Projet : jmadzo/cgat

    def run(self):

        self.prepareRun()

        if not self.mProgram:
            raise UsageError("no program specified.")

        s = subprocess.Popen(
            "%s" % (self.mProgram),
            shell=True,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            cwd=self.mTempdir,
            close_fds=True,
        )

        (out, err) = s.communicate("\n".join(self.mOptions) + "\n")

        if s.returncode != 0:
            raise UsageError, "Error in running phylip.\n%s\n%s\nTemporary directory was %s" % (out, err, self.mTempdir)

        # Parse output files that might have been created:
        result = PhylipResult()

        # parse tree file
        if os.path.exists("%s/outtree" % self.mTempdir):

            nexus = TreeTools.Newick2Nexus(open("%s/outtree" % self.mTempdir, "r"))
            for tree in nexus.trees:
                TreeTools.MapTaxa(tree, self.mMapPhylip2Input)
            result.mNexus = nexus
            if self.mLogLevel >= 1:
                print "# received tree with %i taxa" % (len(TreeTools.GetTaxa(nexus.trees[0])))

        elif os.path.exists("%s/outfile" % self.mTempdir):

            if self.mProgram in ("dnadist", "protdist"):
                infile = open("%s/outfile" % self.mTempdir, "r")
                result.mMatrix, row_headers, col_headers = MatlabTools.readMatrix(infile, format="phylip")
                result.mRowHeaders = []
                for x in row_headers:
                    result.mRowHeaders.append(self.mMapPhylip2Input[x])
                result.mColHeaders = result.mRowHeaders
            elif self.mProgram == "contrast":

                infile = open("%s/outfile" % self.mTempdir, "r")
                result.parseContrasts(infile)
                infile.close()

        else:
            raise "other return types not implemented"

        if self.mLogLevel >= 2:
            print out

        if self.mLogLevel == 0:
            shutil.rmtree(self.mTempdir)

        return result

Exemple #2

0

Afficher le fichier

Fichier : plot_matrix.py Projet : SCV/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: plot_matrix.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-c", "--columns", dest="columns", type="string",
                      help="columns to take from table.")

    parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string",
                      help="write hardcopy to file.", metavar="FILE")

    parser.add_option("-f", "--file", dest="input_filename", type="string",
                      help="filename with table data.",
                      metavar="FILE")

    parser.add_option("-p", "--plot", dest="plot", type="string",
                      help="plots to plot.",
                      action="append")

    parser.add_option("-t", "--threshold", dest="threshold", type="float",
                      help="min threshold to use for counting method.")

    parser.add_option("-o", "--colours", dest="colours", type="int",
                      help="column with colour information.")

    parser.add_option("-l", "--plot-labels", dest="labels", type="string",
                      help="column labels for x and y in matched plots.")

    parser.add_option("-e", "--header-names", dest="headers", action="store_true",
                      help="headers are supplied in matrix.")

    parser.add_option("--no-headers", dest="headers", action="store_false",
                      help="headers are not supplied in matrix.")

    parser.add_option("--normalize", dest="normalize", action="store_true",
                      help="normalize matrix.")

    parser.add_option("--palette", dest="palette", type="choice",
                      choices=("rainbow", "gray", "blue-white-red",
                               "autumn", "bone", "cool", "copper", "flag", "gray", "hot", "hsv", "jet", "pink", "prism",
                               "spring", "summer", "winter", "spectral",
                               "RdBu", "RdGy", "BrBG", "BuGn", "Blues", "Greens", "Reds", "Oranges", "Greys"),
                      help="colour palette [default=%Default]")

    parser.add_option("--reverse-palette", dest="reverse_palette", action="store_true",
                      help="reverse the palette [default=%default].")

    parser.add_option("", "--xrange", dest="xrange", type="string",
                      help="xrange.")

    parser.add_option("", "--yrange", dest="yrange", type="string",
                      help="yrange.")

    parser.add_option("", "--zrange", dest="zrange", type="string",
                      help="zrange.")

    parser.add_option("", "--xticks", dest="xticks", type="string",
                      help="xticks.")

    parser.add_option("", "--yticks", dest="yticks", type="string",
                      help="yticks.")

    parser.add_option("--bar-format", dest="bar_format", type="string",
                      help="format for ticks on colourbar.")

    parser.add_option("--title", dest="title", type="string",
                      help="title to use.")

    parser.add_option("--missing-value", dest="missing", type="float",
                      help="value to use for missing data.")

    parser.add_option("--subplots", dest="subplots", type="string",
                      help="split matrix into several subplots. Supply number of rows and columns separated by a comma.")

    parser.set_defaults(
        hardcopy=None,
        input_filename="-",
        columns="all",
        statistics=[],
        plot=[],
        threshold=0.0,
        labels="x,y",
        colours=None,
        xrange=None,
        yrange=None,
        zrange=None,
        palette=None,
        reverse_palette=False,
        xticks=None,
        yticks=None,
        normalize=False,
        bar_format="%1.1f",
        headers=True,
        missing=None,
        title=None,
        subplots=None)

    (options, args) = E.Start(parser)

    # import matplotlib/pylab. Has to be done here
    # for batch scripts without GUI.
    import matplotlib
    if options.hardcopy:
        matplotlib.use("cairo")
    import pylab

    if len(args) > 0:
        options.input_filename = ",".join(args)

    if options.xticks:
        options.xticks = options.xticks.split(",")
    if options.yticks:
        options.yticks = options.yticks.split(",")

    if options.xrange:
        options.xrange = map(float, options.xrange.split(","))
    if options.yrange:
        options.yrange = map(float, options.yrange.split(","))

    if options.columns != "all":
        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

    filenames = options.input_filename.split(",")

    if len(filenames) > 1:
        nsubrows = (len(filenames) / 3) + 1
        nsubcols = 3
    elif options.subplots:
        nsubrows, nsubcols = [int(x) for x in options.subplots.split(",")]
    else:
        nsubrows, nsubcols = 1, 1

    nsubplots = nsubrows * nsubcols

    # Setting up color maps
    if options.palette:
        if options.palette == "gray":
            _gray_data = {'red':   ((0., 1, 1), (1., 0, 0)),
                          'green': ((0., 1, 1), (1., 0, 0)),
                          'blue':  ((0., 1, 1), (1., 0, 0))}

            LUTSIZE = pylab.rcParams['image.lut']
            colors_gray = matplotlib.colors.LinearSegmentedColormap(
                'gray',   _gray_data, LUTSIZE)

    plot_id = 0
    for filename in filenames:

        plot_id += 1
        pylab.subplot(nsubrows, nsubcols, plot_id)

        if filename == "-":
            infile = sys.stdin
        else:
            infile = open(filename, "r")

        matrix, row_headers, col_headers = MatlabTools.readMatrix(infile,
                                                                  numeric_type=numpy.float32,
                                                                  take=options.columns,
                                                                  headers=options.headers,
                                                                  missing=options.missing)

        if min(matrix.flat) == max(matrix.flat):
            options.stderr.write("matrix is uniform - no plotting done.\n")
            sys.exit(0)

        if options.normalize:
            v = max(matrix.flat)
            matrix = matrix / v

        if options.zrange:
            options.zrange = GetRange(matrix, options.zrange)

        nrows, ncols = matrix.shape

        if options.palette:
            if options.palette == "gray":
                color_scheme = colors_gray
            else:
                if options.reverse_palette:
                    color_scheme = eval("pylab.cm.%s_r" % options.palette)
                else:
                    color_scheme = eval("pylab.cm.%s" % options.palette)
        else:
            color_scheme = None

        if options.zrange:
            vmin, vmax = options.zrange
            matrix[matrix < vmin] = vmin
            matrix[matrix > vmax] = vmax
        else:
            vmin, vmax = None, None

        if options.subplots:

            if nsubcols > 1:
                increment_x = int(float(nrows + 1) / nsubcols)
                increment_y = nrows

                x = 0
                y = 0
                for n in range(nsubplots):
                    pylab.subplot(nsubrows, nsubcols, plot_id)
                    plot_id += 1

                    print n, "rows=", nsubrows, "cols=", nsubcols, y, y + increment_y, x, x + increment_x
                    print matrix[y:y + increment_y, x:x + increment_x].shape
                    print matrix.shape
                    plotMatrix(matrix[y:y + increment_y, x:x + increment_x],
                               color_scheme,
                               row_headers[y:y + increment_y],
                               col_headers[x:x + increment_x],
                               0, 100, options)

                x += increment_x

            elif nsubrows > 1:
                increment_x = int(float(ncols + 1) / nsubrows)

                x = 0
                for n in range(nsubplots):
                    pylab.subplot(nsubrows, nsubcols, plot_id)
                    plot_id += 1
                    plotMatrix(matrix[0:nrows, x:x + increment_x],
                               color_scheme,
                               row_headers,
                               col_headers[x:x + increment_x],
                               vmin, vmax, options)

                    x += increment_x
        else:
            plotMatrix(
                matrix, color_scheme, row_headers, col_headers, vmin, vmax, options)

        if options.xrange:
            pylab.xlim(options.xrange)

        if options.yrange:
            pylab.ylim(options.yrange)

        if options.labels:
            xlabel, ylabel = options.labels.split(",")
            pylab.xlabel(xlabel)
            pylab.ylabel(ylabel)

        if not options.subplots:
            pylab.colorbar(format=options.bar_format)

        if options.title is None or options.title != "":
            pylab.title(filename)

    if options.hardcopy:
        pylab.savefig(os.path.expanduser(options.hardcopy))
    else:
        pylab.show()

    E.Stop()

Exemple #3

0

Afficher le fichier

Fichier : matrix2matrix.py Projet : SCV/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: matrix2matrix.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-m", "--method", dest="methods", type="choice", action="append",
                      choices=("normalize-by-min-diagonal", "normalize-by-column",
                               "log", "ln", "negzero2value",
                               "set-diagonal",
                               "subtract-matrix", "mix-matrix", "normalize-by-matrix",
                               "normalize-by-column-max", "normalize-by-row-max",
                               "normalize-by-column-min", "normalize-by-row-min",
                               "normalize-by-column-median", "normalize-by-row-median",
                               "normalize-by-column-mean", "normalize-by-row-mean",
                               "normalize-by-column-total", "normalize-by-row-total",
                               "correspondence-analysis",
                               "normalize-by-value",
                               "add-value",
                               "sort-rows", "sort-columns",
                               "transpose",
                               "upper-bound", "lower-bound",
                               "subtract-first-col", "multiply-by-value", "divide-by-value",
                               "mask-rows", "mask-columns", "mask-rows-and-columns",
                               "symmetrize-mean", "symmetrize-max", "symmetrize-min",
                               ),
                      help="""method to use [default=%default]"""  )

    parser.add_option("-s", "--scale", dest="scale", type="float",
                      help="factor to scale matrix by [default=%default].")

    parser.add_option("-f", "--format", dest="format", type="string",
                      help="output number format [default=%default].")

    parser.add_option("--rows-tsv-file", dest="filename_rows", type="string",
                      help="filename with rows to mask [default=%default].")

    parser.add_option("--columns-tsv-file", dest="filename_columns", type="string",
                      help="filename with columns to mask [default=%default].")

    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="Parameters for various functions.")

    parser.add_option("-t", "--header-names", dest="headers", action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--no-headers", dest="headers", action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("-a", "--value", dest="value", type="float",
                      help="value to use for various algorithms.")

    parser.add_option("-i", "--input-format", dest="input_format", type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""input format for matrix."""  )

    parser.add_option("-o", "--output-format", dest="output_format", type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""output format for matrix."""  )

    parser.add_option("--missing-value", dest="missing", type="float",
                      help="value to use for missing values. If not set, missing values will cause the script to fail [default=%default].")

    parser.set_defaults(
        methods=[],
        scale=1.0,
        headers=True,
        format="%6.4f",
        output_format="full",
        input_format="full",
        value=0.0,
        parameters="",
        write_separators=True,
        filename_rows=None,
        filename_columns=None,
        missing=None,
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

    if len(lines) == 0:
        raise IOError("no input")

    chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    if options.filename_rows:
        row_names, n = IOTools.ReadList(open(options.filename_rows, "r"))
    if options.filename_columns:
        column_names, n = IOTools.ReadList(open(options.filename_columns, "r"))

    for chunk in range(len(chunks) - 1):

        try:
            raw_matrix, row_headers, col_headers = MatlabTools.readMatrix(StringIO.StringIO("".join(lines[chunks[chunk] + 1:chunks[chunk + 1]])),
                                                                          format=options.input_format,
                                                                          headers=options.headers,
                                                                          missing=options.missing)
        except ValueError, msg:
            E.warn("matrix could not be read: %s" % msg)
            continue

        nrows, ncols = raw_matrix.shape

        E.debug("read matrix: %i x %i, %i row titles, %i colum titles" %
                (nrows, ncols, len(row_headers), len(col_headers)))

        parameter = 0

        for method in options.methods:

            matrix = numpy.reshape(numpy.array(raw_matrix), raw_matrix.shape)

            if method in ("normalize-by-matrix", "subtract-matrix", "mix-matrix", "add-matrix"):

                other_matrix, other_row_headers, other_col_headers = MatlabTools.ReadMatrix(open(options.parameters[parameter], "r"),
                                                                                            headers=options.headers)

                other_nrows, other_ncols = other_matrix.shape

                if options.loglevel >= 2:
                    options.stdlog.write("# read second matrix from %s: %i x %i, %i row titles, %i colum titles.\n" %
                                         (options.parameters[parameter],
                                          other_nrows, other_ncols, len(other_row_headers), len(other_col_headers)))

                parameter += 1

            elif method == "normalize-by-min-diagonal":
                for x in range(nrows):
                    for y in range(ncols):
                        m = min(raw_matrix[x, x], raw_matrix[y, y])
                        if m > 0:
                            matrix[x, y] = raw_matrix[x, y] / m

            elif method == "normalize-by-column":
                if nrows != ncols:
                    raise "only supported for symmeric matrices."

                for x in range(nrows):
                    for y in range(ncols):
                        if raw_matrix[y, y] > 0:
                            matrix[x, y] = raw_matrix[x, y] / raw_matrix[y, y]

            elif method == "normalize-by-value":
                matrix = raw_matrix / float(options.parameters[parameter])
                parameter += 1

            elif method == "normalize-by-row":
                if nrows != ncols:
                    raise "only supported for symmeric matrices."

                for x in range(nrows):
                    for y in range(ncols):
                        if raw_matrix[y, y] > 0:
                            matrix[x, y] = raw_matrix[x, y] / raw_matrix[x, x]

            elif method == "subtract-first-col":
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] -= raw_matrix[x, 0]

            elif method.startswith("normalize-by-column"):
                if method.endswith("max"):
                    f = max
                elif method.endswith("min"):
                    f = min
                elif method.endswith("median"):
                    f = scipy.median
                elif method.endswith("mean"):
                    f = scipy.mean
                elif method.endswith("total"):
                    f = sum

                for y in range(ncols):
                    m = f(matrix[:, y])
                    if m != 0:
                        for x in range(nrows):
                            matrix[x, y] = matrix[x, y] / m

            elif method.startswith("normalize-by-row"):
                if method.endswith("max"):
                    f = max
                elif method.endswith("min"):
                    f = min
                elif method.endswith("median"):
                    f = scipy.median
                elif method.endswith("mean"):
                    f = scipy.mean
                elif method.endswith("total"):
                    f = sum

                for x in range(nrows):
                    m = f(matrix[x, :])
                    if m != 0:
                        for y in range(ncols):
                            matrix[x, y] = raw_matrix[x, y] / m

            elif method == "negzero2value":
                # set zero/negative values to a value
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] <= 0:
                            matrix[x, y] = options.value

            elif method == "minmax":
                # set zero/negative values to a value
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y], matrix[y, x] = \
                            min(matrix[x, y], matrix[y, x]), \
                            max(matrix[x, y], matrix[y, x])

            elif method == "log":
                # apply log to all values.
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] > 0:
                            matrix[x, y] = math.log10(matrix[x, y])

            elif method == "ln":
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] > 0:
                            matrix[x, y] = math.log(matrix[x, y])

            elif method == "transpose":
                matrix = numpy.transpose(matrix)
                row_headers, col_headers = col_headers, row_headers
                nrows, ncols = ncols, nrows

            elif method == "mul":
                matrix = numpy.dot(matrix, numpy.transpose(matrix))
                col_headers = row_headers

            elif method == "multiply-by-value":
                matrix *= options.value

            elif method == "divide-by-value":
                matrix /= options.value

            elif method == "add-value":
                matrix += options.value

            elif method == "angle":
                # write angles between col vectors
                v1 = numpy.sqrt(numpy.sum(numpy.power(matrix, 2), 0))
                matrix = numpy.dot(numpy.transpose(matrix), matrix)
                row_headers = col_headers
                nrows = ncols
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] /= v1[x] * v1[y]

            elif method == "euclid":
                # convert to euclidean distance matrix
                matrix = numpy.zeros((ncols, ncols), numpy.float)
                for c1 in range(0, ncols - 1):
                    for c2 in range(c1 + 1, ncols):
                        for r in range(0, nrows):
                            d = raw_matrix[r][c1] - raw_matrix[r][c2]
                            matrix[c1, c2] += (d * d)
                        matrix[c2, c1] = matrix[c1, c2]
                matrix = numpy.sqrt(matrix)
                row_headers = col_headers
                nrows = ncols

            elif method.startswith("symmetrize"):
                f = method.split("-")[1]
                if f == "max":
                    f = max
                elif f == "min":
                    f = min
                elif f == "mean":
                    f = lambda x, y: float(x + y) / 2

                if nrows != ncols:
                    raise ValueError(
                        "symmetrize only available for symmetric matrices")
                if row_headers != col_headers:
                    raise ValueError(
                        "symmetrize not available for permuted matrices")
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] = matrix[y, x] = f(
                            matrix[x, y], matrix[y, x])
            elif method == "sub":
                matrix = options.value - matrix

            elif method in ("lower-bound", "upper-bound"):

                boundary = float(options.parameters[parameter])
                new_value = float(options.parameters[parameter + 1])
                parameter += 2
                if method == "upper-bound":
                    for x in range(nrows):
                        for y in range(ncols):
                            if matrix[x, y] > boundary:
                                matrix[x, y] = new_value
                else:
                    for x in range(nrows):
                        for y in range(ncols):
                            if matrix[x, y] < boundary:
                                matrix[x, y] = new_value

            elif method == "subtract-matrix":
                matrix = matrix - other_matrix

            elif method == "add-matrix":
                matrix = matrix + other_matrix

            elif method == "normalize-by-matrix":

                # set 0s to 1 in the other matrix
                for x in range(nrows):
                    for y in range(ncols):
                        if other_matrix[x, y] == 0:
                            other_matrix[x, y] = 1.0

                matrix = matrix / other_matrix

            elif method == "mix-matrix":
                for x in range(len(other_row_headers) - 1):
                    for y in range(x + 1, len(other_col_headers)):
                        matrix[x, y] = other_matrix[x, y]

            elif method == "set-diagonal":
                value = float(options.parameters[parameter])
                for x in range(min(nrows, ncols)):
                    matrix[x, x] = value
                parameter += 1

            elif method == "transpose":
                matrix = numpy.transpose(raw_matrix)
                row_headers, col_headers = col_headers, row_headers

            elif method == "correspondence-analysis":
                row_indices, col_indices = CorrespondenceAnalysis.GetIndices(
                    raw_matrix)
                map_row_new2old = numpy.argsort(row_indices)
                map_col_new2old = numpy.argsort(col_indices)

                matrix, row_headers, col_headers = CorrespondenceAnalysis.GetPermutatedMatrix(raw_matrix,
                                                                                              map_row_new2old,
                                                                                              map_col_new2old,
                                                                                              row_headers=row_headers,
                                                                                              col_headers=col_headers)

            elif method == "mask-rows":
                r = set(row_names)
                for x in range(len(row_headers)):
                    if row_headers[x] in r:
                        matrix[x, :] = options.value

            elif method == "mask-columns":
                r = set(column_names)
                for x in range(len(col_headers)):
                    if col_headers[x] in r:
                        matrix[:, x] = options.value

            elif method == "mask-rows-and-columns":

                r = set(row_names)
                c = set(column_names)
                for x in range(len(row_headers)):
                    for y in range(len(col_headers)):
                        if row_headers[x] in r and col_headers[y] in c:
                            matrix[x, y] = options.value

            raw_matrix = numpy.reshape(numpy.array(matrix), matrix.shape)

        else:
            # for simple re-formatting jobs
            matrix = raw_matrix

        if options.write_separators:
            options.stdout.write(lines[chunks[chunk]])

        MatlabTools.writeMatrix(sys.stdout, matrix,
                                value_format=options.format,
                                format=options.output_format,
                                row_headers=row_headers,
                                col_headers=col_headers)

Exemple #4

0

Afficher le fichier

Fichier : convert_geneatlas.py Projet : BioinformaticsArchive/cgat

                         filename_info = None,
                         filename_tissues = None,
                         headers = True,
                         aggregate = "mean",
                         value_format = "%5.2f",
                         method="counts")
    
    (options, args) = E.Start( parser )

    if not options.filename_map:
        raise "please supply filename mapping probesets to identifiers."
    
    map_probe2locus = IOTools.ReadMap( open(options.filename_map, "r") )

    matrix, row_headers, col_headers = MatlabTools.readMatrix( sys.stdin,
                                                               format="full", 
                                                               headers = options.headers )

    if options.filename_tissues:
        tissues, nerrors = IOTools.ReadList( open(options.filename_tissues, "r") )
        tissues = set(tissues)
        columns = []
        for x in range(len(col_headers)):
            if col_headers[x] in tissues:
                columns.append( x )
    else:
        columns = range(len(col_headers))
        
    nrows, ncols = len(row_headers), len(col_headers)
    
    ninput, noutput, nkept = 0, 0, 0

Exemple #5

0

Afficher le fichier

Fichier : matrix2stats.py Projet : CGATOxford/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id: matrix2stats.py 2795 2009-09-16 15:29:23Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("chi-squared", "pearson-chi-squared"),
                      help="statistical methods to apply.")

    parser.add_option("-t", "--header-names", dest="headers", action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--no-headers", dest="headers", action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("-i", "--input-format", dest="input_format", type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""input format for matrix."""  )

    parser.add_option("-o", "--output-format", dest="output_format", type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""output format for matrix."""  )

    parser.add_option("-p", "--parameters", dest="parameters", action="append", type="string",
                      help="parameters for various functions.")

    parser.add_option("-a", "--iteration", dest="iteration", type="choice",
                      choices=("pairwise", "all-vs-all"),
                      help="""how to compute stats [%default]."""  )

    parser.set_defaults(
        method="chi-squared",
        headers=True,
        value_format="%6.4f",
        pvalue_format="%6.4e",
        input_format="full",
        write_separators=True,
        parameters=[],
        iteration=None,
    )

    (options, args) = E.Start(parser)

    lines = [x for x in sys.stdin.readlines() if x[0] != "#"]

    chunks = [x for x in range(len(lines)) if lines[x][0] == ">"]

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    ninput, noutput, nskipped = 0, 0, 0

    if options.write_separators:
        options.stdout.write("test\t")

    header_prefix = ""

    if options.method == "chi-squared":
        header_prefix = "observed\texpected"
        options.stdout.write("\t".join(
            (header_prefix, "n", "min", "max", "chi", "df", "P", "passed", "phi")) + "\n")

    elif options.method in ("pearson-chi-squared",):
        options.stdout.write("column\t")
        options.stdout.write("\t".join(
            (header_prefix, "n", "prob", "obs", "exp", "chi", "df", "P", "passed", "phi")) + "\n")

        if len(options.parameters) == 0:
            raise "out of parameters - please supply probability or filename with probabilities."

        param = options.parameters[0]
        del options.parameters[0]

        if options.write_separators:
            probabilities = IOTools.ReadMap(
               IOTools.openFile(param, "r"), map_functions=(str, float))
        else:
            probability = float(param)

    for x in range(len(chunks) - 1):
        ninput += 1
        matrix, row_headers, col_headers = MatlabTools.readMatrix(
            StringIO("".join(lines[chunks[x] + 1:chunks[x + 1]])),
            format=options.input_format,
            headers=options.headers)
        nrows, ncols = matrix.shape

        if options.loglevel >= 2:
            options.stdlog.write("# read matrix: %i x %i, %i row titles, %i colum titles.\n" %
                                 (nrows, ncols, len(row_headers), len(col_headers)))

        if options.write_separators:
            options.stdout.write(lines[chunks[x]][1:-1] + "\t")

        pairs = []
        if options.iteration == "pairwise":
            pairs = []
            for row1 in range(0, len(row_headers)):
                for row2 in range(row1 + 1, len(row_headers)):
                    pairs.append((row1, row2))
        elif options.iteration == "all-vs-all":
            pairs = []
            for row1 in range(0, len(row_headers)):
                for row2 in range(0, len(row_headers)):
                    if row1 == row2:
                        continue
                    pairs.append((row1, row2))

        if options.method == "chi-squared":

            for row1, row2 in pairs:
                row_header1 = row_headers[row1]
                row_header2 = row_headers[row2]
                try:
                    result = Stats.doChiSquaredTest(
                        numpy.vstack((matrix[row1], matrix[row2])))
                except ValueError:
                    nskipped += 1
                    continue

                noutput += 1
                options.stdout.write("\t".join((
                    "%s" % row_header1,
                    "%s" % row_header2,
                    "%i" % result.mSampleSize,
                    "%i" % min(matrix.flat),
                    "%i" % max(matrix.flat),
                    options.value_format % result.mChiSquaredValue,
                    "%i" % result.mDegreesFreedom,
                    options.pvalue_format % result.mProbability,
                    "%s" % result.mSignificance,
                    options.value_format % result.mPhi)) + "\n")

        elif options.method == "pearson-chi-squared":

            if nrows != 2:
                raise ValueError("only implemented for 2xn table")

            if options.write_separators:
                id = re.match("(\S+)", lines[chunks[x]][1:-1]).groups()[0]
                probability = probabilities[id]

            for col in range(ncols):
                options.stdout.write("%s\t" % col_headers[col])
                result = Stats.doPearsonChiSquaredTest(
                    probability, sum(matrix[:, col]), matrix[0, col])
                options.stdout.write("\t".join((
                    "%i" % result.mSampleSize,
                    "%f" % probability,
                    "%i" % result.mObserved,
                    "%f" % result.mExpected,
                    options.value_format % result.mChiSquaredValue,
                    "%i" % result.mDegreesFreedom,
                    options.pvalue_format % result.mProbability,
                    "%s" % result.mSignificance,
                    options.value_format % result.mPhi)))
                if col < ncols - 1:
                    options.stdout.write("\n")
                    if options.write_separators:
                        options.stdout.write(lines[chunks[x]][1:-1] + "\t")

            options.stdout.write("\n")

    E.info("# ninput=%i, noutput=%i, nskipped=%i\n" %
           (ninput, noutput, nskipped))

    E.Stop()

Exemple #6

0

Afficher le fichier

Fichier : matrix2matrix.py Projet : siping/cgat

    if not chunks:
        options.write_separators = False
        chunks = [-1]
        
    chunks.append( len(lines) )

    if options.filename_rows:
        row_names, n = IOTools.ReadList( open( options.filename_rows, "r") )
    if options.filename_columns:
        column_names, n = IOTools.ReadList( open( options.filename_columns, "r") )

    for chunk in range(len(chunks) -1 ):

        try:
            raw_matrix, row_headers, col_headers = MatlabTools.readMatrix( StringIO.StringIO("".join(lines[chunks[chunk]+1:chunks[chunk+1]])),
                                                                           format=options.input_format, 
                                                                           headers = options.headers,
                                                                           missing = options.missing )
        except ValueError, msg:
            E.warn( "matrix could not be read: %s" % msg)
            continue

        nrows, ncols = raw_matrix.shape

        E.debug("read matrix: %i x %i, %i row titles, %i colum titles" %\
                    (nrows, ncols, len(row_headers), len(col_headers)))

        parameter = 0

        for method in options.methods:

            matrix = numpy.reshape( numpy.array(raw_matrix), raw_matrix.shape )

Exemple #7

0

Afficher le fichier

Fichier : plot_matrix.py Projet : siping/cgat

            colors_gray   = matplotlib.colors.LinearSegmentedColormap('gray',   _gray_data, LUTSIZE)

    plot_id = 0
    for filename in filenames:

        plot_id += 1
        pylab.subplot( nsubrows, nsubcols, plot_id)
        
        if filename == "-":
            infile = sys.stdin
        else:
            infile = open(filename, "r")
            
        matrix,row_headers,col_headers = MatlabTools.readMatrix( infile,
                                                                 numeric_type=numpy.float32,
                                                                 take=options.columns,
                                                                 headers = options.headers,
                                                                 missing = options.missing )

        if min(matrix.flat) == max(matrix.flat):
            options.stderr.write( "matrix is uniform - no plotting done.\n")
            sys.exit(0)

        if options.normalize:
            v = max(matrix.flat)
            matrix = matrix / v
        
        if options.zrange: options.zrange = GetRange( matrix, options.zrange )

        nrows, ncols = matrix.shape

Exemple #8

0

Afficher le fichier

Fichier : plot_matrix.py Projet : AndreasHegerGenomics/cgat-apps

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: plot_matrix.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to take from table.")

    parser.add_option("-a",
                      "--hardcopy",
                      dest="hardcopy",
                      type="string",
                      help="write hardcopy to file.",
                      metavar="FILE")

    parser.add_option("-f",
                      "--file",
                      dest="input_filename",
                      type="string",
                      help="filename with table data.",
                      metavar="FILE")

    parser.add_option("-p",
                      "--plot",
                      dest="plot",
                      type="string",
                      help="plots to plot.",
                      action="append")

    parser.add_option("-t",
                      "--threshold",
                      dest="threshold",
                      type="float",
                      help="min threshold to use for counting method.")

    parser.add_option("-o",
                      "--colours",
                      dest="colours",
                      type="int",
                      help="column with colour information.")

    parser.add_option("-l",
                      "--plot-labels",
                      dest="labels",
                      type="string",
                      help="column labels for x and y in matched plots.")

    parser.add_option("-e",
                      "--header-names",
                      dest="headers",
                      action="store_true",
                      help="headers are supplied in matrix.")

    parser.add_option("--no-headers",
                      dest="headers",
                      action="store_false",
                      help="headers are not supplied in matrix.")

    parser.add_option("--normalize",
                      dest="normalize",
                      action="store_true",
                      help="normalize matrix.")

    parser.add_option("--palette",
                      dest="palette",
                      type="choice",
                      choices=("rainbow", "gray", "blue-white-red", "autumn",
                               "bone", "cool", "copper", "flag", "gray", "hot",
                               "hsv", "jet", "pink", "prism", "spring",
                               "summer", "winter", "spectral", "RdBu", "RdGy",
                               "BrBG", "BuGn", "Blues", "Greens", "Reds",
                               "Oranges", "Greys"),
                      help="colour palette [default=%Default]")

    parser.add_option("--reverse-palette",
                      dest="reverse_palette",
                      action="store_true",
                      help="reverse the palette [default=%default].")

    parser.add_option("",
                      "--xrange",
                      dest="xrange",
                      type="string",
                      help="xrange.")

    parser.add_option("",
                      "--yrange",
                      dest="yrange",
                      type="string",
                      help="yrange.")

    parser.add_option("",
                      "--zrange",
                      dest="zrange",
                      type="string",
                      help="zrange.")

    parser.add_option("",
                      "--xticks",
                      dest="xticks",
                      type="string",
                      help="xticks.")

    parser.add_option("",
                      "--yticks",
                      dest="yticks",
                      type="string",
                      help="yticks.")

    parser.add_option("--bar-format",
                      dest="bar_format",
                      type="string",
                      help="format for ticks on colourbar.")

    parser.add_option("--title",
                      dest="title",
                      type="string",
                      help="title to use.")

    parser.add_option("--missing-value",
                      dest="missing",
                      type="float",
                      help="value to use for missing data.")

    parser.add_option(
        "--subplots",
        dest="subplots",
        type="string",
        help=
        "split matrix into several subplots. Supply number of rows and columns separated by a comma."
    )

    parser.set_defaults(hardcopy=None,
                        input_filename="-",
                        columns="all",
                        statistics=[],
                        plot=[],
                        threshold=0.0,
                        labels="x,y",
                        colours=None,
                        xrange=None,
                        yrange=None,
                        zrange=None,
                        palette=None,
                        reverse_palette=False,
                        xticks=None,
                        yticks=None,
                        normalize=False,
                        bar_format="%1.1f",
                        headers=True,
                        missing=None,
                        title=None,
                        subplots=None)

    (options, args) = E.start(parser)

    # import matplotlib/pylab. Has to be done here
    # for batch scripts without GUI.
    import matplotlib
    if options.hardcopy:
        matplotlib.use("cairo")
    import pylab

    if len(args) > 0:
        options.input_filename = ",".join(args)

    if options.xticks:
        options.xticks = options.xticks.split(",")
    if options.yticks:
        options.yticks = options.yticks.split(",")

    if options.xrange:
        options.xrange = list(map(float, options.xrange.split(",")))
    if options.yrange:
        options.yrange = list(map(float, options.yrange.split(",")))

    if options.columns != "all":
        options.columns = [int(x) - 1 for x in options.columns.split(",")]

    filenames = options.input_filename.split(",")

    if len(filenames) > 1:
        nsubrows = (len(filenames) / 3) + 1
        nsubcols = 3
    elif options.subplots:
        nsubrows, nsubcols = [int(x) for x in options.subplots.split(",")]
    else:
        nsubrows, nsubcols = 1, 1

    nsubplots = nsubrows * nsubcols

    # Setting up color maps
    if options.palette:
        if options.palette == "gray":
            _gray_data = {
                'red': ((0., 1, 1), (1., 0, 0)),
                'green': ((0., 1, 1), (1., 0, 0)),
                'blue': ((0., 1, 1), (1., 0, 0))
            }

            LUTSIZE = pylab.rcParams['image.lut']
            colors_gray = matplotlib.colors.LinearSegmentedColormap(
                'gray', _gray_data, LUTSIZE)

    plot_id = 0
    for filename in filenames:

        plot_id += 1
        pylab.subplot(nsubrows, nsubcols, plot_id)

        if filename == "-":
            infile = sys.stdin
        else:
            infile = IOTools.open_file(filename, "r")

        matrix, row_headers, col_headers = MatlabTools.readMatrix(
            infile,
            numeric_type=numpy.float32,
            take=options.columns,
            headers=options.headers,
            missing=options.missing)

        if min(matrix.flat) == max(matrix.flat):
            options.stderr.write("matrix is uniform - no plotting done.\n")
            sys.exit(0)

        if options.normalize:
            v = max(matrix.flat)
            matrix = matrix / v

        if options.zrange:
            options.zrange = GetRange(matrix, options.zrange)

        nrows, ncols = matrix.shape

        if options.palette:
            if options.palette == "gray":
                color_scheme = colors_gray
            else:
                if options.reverse_palette:
                    color_scheme = eval("pylab.cm.%s_r" % options.palette)
                else:
                    color_scheme = eval("pylab.cm.%s" % options.palette)
        else:
            color_scheme = None

        if options.zrange:
            vmin, vmax = options.zrange
            matrix[matrix < vmin] = vmin
            matrix[matrix > vmax] = vmax
        else:
            vmin, vmax = None, None

        if options.subplots:

            if nsubcols > 1:
                increment_x = int(float(nrows + 1) / nsubcols)
                increment_y = nrows

                x = 0
                y = 0
                for n in range(nsubplots):
                    pylab.subplot(nsubrows, nsubcols, plot_id)
                    plot_id += 1

                    print(n, "rows=", nsubrows, "cols=", nsubcols, y,
                          y + increment_y, x, x + increment_x)
                    print(matrix[y:y + increment_y, x:x + increment_x].shape)
                    print(matrix.shape)
                    plotMatrix(matrix[y:y + increment_y, x:x + increment_x],
                               color_scheme, row_headers[y:y + increment_y],
                               col_headers[x:x + increment_x], 0, 100, options)

                x += increment_x

            elif nsubrows > 1:
                increment_x = int(float(ncols + 1) / nsubrows)

                x = 0
                for n in range(nsubplots):
                    pylab.subplot(nsubrows, nsubcols, plot_id)
                    plot_id += 1
                    plotMatrix(matrix[0:nrows,
                                      x:x + increment_x], color_scheme,
                               row_headers, col_headers[x:x + increment_x],
                               vmin, vmax, options)

                    x += increment_x
        else:
            plotMatrix(matrix, color_scheme, row_headers, col_headers, vmin,
                       vmax, options)

        if options.xrange:
            pylab.xlim(options.xrange)

        if options.yrange:
            pylab.ylim(options.yrange)

        if options.labels:
            xlabel, ylabel = options.labels.split(",")
            pylab.xlabel(xlabel)
            pylab.ylabel(ylabel)

        if not options.subplots:
            pylab.colorbar(format=options.bar_format)

        if options.title is None or options.title != "":
            pylab.title(filename)

    if options.hardcopy:
        pylab.savefig(os.path.expanduser(options.hardcopy))
    else:
        pylab.show()

    E.stop()

Exemple #9

0

Afficher le fichier

    parser.set_defaults(filename_map=None,
                        filename_info=None,
                        filename_tissues=None,
                        headers=True,
                        aggregate="mean",
                        value_format="%5.2f",
                        method="counts")

    (options, args) = E.Start(parser)

    if not options.filename_map:
        raise "please supply filename mapping probesets to identifiers."

    map_probe2locus = IOTools.ReadMap(open(options.filename_map, "r"))

    matrix, row_headers, col_headers = MatlabTools.readMatrix(
        sys.stdin, format="full", headers=options.headers)

    if options.filename_tissues:
        tissues, nerrors = IOTools.ReadList(open(options.filename_tissues,
                                                 "r"))
        tissues = set(tissues)
        columns = []
        for x in range(len(col_headers)):
            if col_headers[x] in tissues:
                columns.append(x)
    else:
        columns = range(len(col_headers))

    nrows, ncols = len(row_headers), len(col_headers)

    ninput, noutput, nkept = 0, 0, 0

Exemple #10

0

Afficher le fichier

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: matrix2stats.py 2795 2009-09-16 15:29:23Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("chi-squared", "pearson-chi-squared"),
                      help="statistical methods to apply.")

    parser.add_option("-t",
                      "--header-names",
                      dest="headers",
                      action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--no-headers",
                      dest="headers",
                      action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""input format for matrix.""")

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""output format for matrix.""")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      action="append",
                      type="string",
                      help="parameters for various functions.")

    parser.add_option("-a",
                      "--iteration",
                      dest="iteration",
                      type="choice",
                      choices=("pairwise", "all-vs-all"),
                      help="""how to compute stats [%default].""")

    parser.set_defaults(
        method="chi-squared",
        headers=True,
        value_format="%6.4f",
        pvalue_format="%6.4e",
        input_format="full",
        write_separators=True,
        parameters=[],
        iteration=None,
    )

    (options, args) = E.start(parser)

    lines = [x for x in sys.stdin.readlines() if x[0] != "#"]

    chunks = [x for x in range(len(lines)) if lines[x][0] == ">"]

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    ninput, noutput, nskipped = 0, 0, 0

    if options.write_separators:
        options.stdout.write("test\t")

    header_prefix = ""

    if options.method == "chi-squared":
        header_prefix = "observed\texpected"
        options.stdout.write("\t".join((header_prefix, "n", "min", "max",
                                        "chi", "df", "P", "passed", "phi")) +
                             "\n")

    elif options.method in ("pearson-chi-squared", ):
        options.stdout.write("column\t")
        options.stdout.write("\t".join((header_prefix, "n", "prob", "obs",
                                        "exp", "chi", "df", "P", "passed",
                                        "phi")) + "\n")

        if len(options.parameters) == 0:
            raise "out of parameters - please supply probability or filename with probabilities."

        param = options.parameters[0]
        del options.parameters[0]

        if options.write_separators:
            probabilities = IOTools.ReadMap(IOTools.open_file(param, "r"),
                                            map_functions=(str, float))
        else:
            probability = float(param)

    for x in range(len(chunks) - 1):
        ninput += 1
        matrix, row_headers, col_headers = MatlabTools.readMatrix(
            StringIO("".join(lines[chunks[x] + 1:chunks[x + 1]])),
            format=options.input_format,
            headers=options.headers)
        nrows, ncols = matrix.shape

        if options.loglevel >= 2:
            options.stdlog.write(
                "# read matrix: %i x %i, %i row titles, %i colum titles.\n" %
                (nrows, ncols, len(row_headers), len(col_headers)))

        if options.write_separators:
            options.stdout.write(lines[chunks[x]][1:-1] + "\t")

        pairs = []
        if options.iteration == "pairwise":
            pairs = []
            for row1 in range(0, len(row_headers)):
                for row2 in range(row1 + 1, len(row_headers)):
                    pairs.append((row1, row2))
        elif options.iteration == "all-vs-all":
            pairs = []
            for row1 in range(0, len(row_headers)):
                for row2 in range(0, len(row_headers)):
                    if row1 == row2:
                        continue
                    pairs.append((row1, row2))

        if options.method == "chi-squared":

            for row1, row2 in pairs:
                row_header1 = row_headers[row1]
                row_header2 = row_headers[row2]
                try:
                    result = Stats.doChiSquaredTest(
                        numpy.vstack((matrix[row1], matrix[row2])))
                except ValueError:
                    nskipped += 1
                    continue

                noutput += 1
                options.stdout.write("\t".join(
                    ("%s" % row_header1, "%s" % row_header2,
                     "%i" % result.mSampleSize, "%i" % min(matrix.flat),
                     "%i" % max(matrix.flat), options.value_format %
                     result.mChiSquaredValue, "%i" % result.mDegreesFreedom,
                     options.pvalue_format % result.mProbability,
                     "%s" % result.mSignificance,
                     options.value_format % result.mPhi)) + "\n")

        elif options.method == "pearson-chi-squared":

            if nrows != 2:
                raise ValueError("only implemented for 2xn table")

            if options.write_separators:
                id = re.match("(\S+)", lines[chunks[x]][1:-1]).groups()[0]
                probability = probabilities[id]

            for col in range(ncols):
                options.stdout.write("%s\t" % col_headers[col])
                result = Stats.doPearsonChiSquaredTest(probability,
                                                       sum(matrix[:, col]),
                                                       matrix[0, col])
                options.stdout.write("\t".join(
                    ("%i" % result.mSampleSize, "%f" % probability,
                     "%i" % result.mObserved, "%f" % result.mExpected,
                     options.value_format % result.mChiSquaredValue,
                     "%i" % result.mDegreesFreedom, options.pvalue_format %
                     result.mProbability, "%s" % result.mSignificance,
                     options.value_format % result.mPhi)))
                if col < ncols - 1:
                    options.stdout.write("\n")
                    if options.write_separators:
                        options.stdout.write(lines[chunks[x]][1:-1] + "\t")

            options.stdout.write("\n")

    E.info("# ninput=%i, noutput=%i, nskipped=%i\n" %
           (ninput, noutput, nskipped))

    E.stop()

Exemple #11

0

Afficher le fichier

Fichier : matrix2stats.py Projet : siping/cgat

        if len(options.parameters) == 0:
            raise "out of parameters - please supply probability or filename with probabilities."

        param = options.parameters[0]
        del options.parameters[0]

        if options.write_separators:
            probabilities = IOTools.ReadMap( open( param, "r" ), map_functions = (str,float) )
        else:
            probability = float( param )


    for x in range(len(chunks) -1 ):
        ninput += 1
        matrix, row_headers, col_headers = MatlabTools.readMatrix( StringIO.StringIO("".join(lines[chunks[x]+1:chunks[x+1]])),
                                                                       format=options.input_format, 
                                                                       headers = options.headers )
        nrows, ncols = matrix.shape

        if options.loglevel >= 2:
            options.stdlog.write( "# read matrix: %i x %i, %i row titles, %i colum titles.\n" %\
                                      (nrows, ncols, len(row_headers), len(col_headers)))

        if options.write_separators:
            options.stdout.write( lines[chunks[x]][1:-1] + "\t" )

        pairs = []
        if options.iteration == "pairwise":
            pairs = []
            for row1 in range( 0, len(row_headers) ):
                for row2 in range( row1+1, len(row_headers) ):

Exemple #12

0

Afficher le fichier

    def run(self):

        self.prepareRun()

        if not self.mProgram:
            raise UsageError("no program specified.")

        s = subprocess.Popen("%s" % (self.mProgram),
                             shell=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             cwd=self.mTempdir,
                             close_fds=True)

        (out, err) = s.communicate("\n".join(self.mOptions) + "\n")

        if s.returncode != 0:
            raise UsageError, "Error in running phylip.\n%s\n%s\nTemporary directory was %s" % (
                out, err, self.mTempdir)

        # Parse output files that might have been created:
        result = PhylipResult()

        # parse tree file
        if os.path.exists("%s/outtree" % self.mTempdir):

            nexus = TreeTools.Newick2Nexus(
                open("%s/outtree" % self.mTempdir, "r"))
            for tree in nexus.trees:
                TreeTools.MapTaxa(tree, self.mMapPhylip2Input)
            result.mNexus = nexus
            if self.mLogLevel >= 1:
                print "# received tree with %i taxa" % (len(
                    TreeTools.GetTaxa(nexus.trees[0])))

        elif os.path.exists("%s/outfile" % self.mTempdir):

            if self.mProgram in ("dnadist", "protdist"):
                infile = open("%s/outfile" % self.mTempdir, "r")
                result.mMatrix, row_headers, col_headers = MatlabTools.readMatrix(
                    infile, format="phylip")
                result.mRowHeaders = []
                for x in row_headers:
                    result.mRowHeaders.append(self.mMapPhylip2Input[x])
                result.mColHeaders = result.mRowHeaders
            elif self.mProgram == "contrast":

                infile = open("%s/outfile" % self.mTempdir, "r")
                result.parseContrasts(infile)
                infile.close()

        else:
            raise "other return types not implemented"

        if self.mLogLevel >= 2:
            print out

        if self.mLogLevel == 0:
            shutil.rmtree(self.mTempdir)

        return result

Exemple #13

0

Afficher le fichier

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: matrix2matrix.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=(
                          "normalize-by-min-diagonal",
                          "normalize-by-column",
                          "log",
                          "ln",
                          "negzero2value",
                          "set-diagonal",
                          "subtract-matrix",
                          "mix-matrix",
                          "normalize-by-matrix",
                          "normalize-by-column-max",
                          "normalize-by-row-max",
                          "normalize-by-column-min",
                          "normalize-by-row-min",
                          "normalize-by-column-median",
                          "normalize-by-row-median",
                          "normalize-by-column-mean",
                          "normalize-by-row-mean",
                          "normalize-by-column-total",
                          "normalize-by-row-total",
                          "correspondence-analysis",
                          "normalize-by-value",
                          "add-value",
                          "sort-rows",
                          "sort-columns",
                          "transpose",
                          "upper-bound",
                          "lower-bound",
                          "subtract-first-col",
                          "multiply-by-value",
                          "divide-by-value",
                          "mask-rows",
                          "mask-columns",
                          "mask-rows-and-columns",
                          "symmetrize-mean",
                          "symmetrize-max",
                          "symmetrize-min",
                      ),
                      help="""method to use [default=%default]""")

    parser.add_option("-s",
                      "--scale",
                      dest="scale",
                      type="float",
                      help="factor to scale matrix by [default=%default].")

    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="string",
                      help="output number format [default=%default].")

    parser.add_option("--rows-tsv-file",
                      dest="filename_rows",
                      type="string",
                      help="filename with rows to mask [default=%default].")

    parser.add_option("--columns-tsv-file",
                      dest="filename_columns",
                      type="string",
                      help="filename with columns to mask [default=%default].")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="Parameters for various functions.")

    parser.add_option("-t",
                      "--header-names",
                      dest="headers",
                      action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--no-headers",
                      dest="headers",
                      action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("-a",
                      "--value",
                      dest="value",
                      type="float",
                      help="value to use for various algorithms.")

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""input format for matrix.""")

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("full", "sparse", "phylip"),
                      help="""output format for matrix.""")

    parser.add_option(
        "--missing-value",
        dest="missing",
        type="float",
        help=
        "value to use for missing values. If not set, missing values will cause the script to fail [default=%default]."
    )

    parser.set_defaults(
        methods=[],
        scale=1.0,
        headers=True,
        format="%6.4f",
        output_format="full",
        input_format="full",
        value=0.0,
        parameters="",
        write_separators=True,
        filename_rows=None,
        filename_columns=None,
        missing=None,
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

    if len(lines) == 0:
        raise IOError("no input")

    chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    if options.filename_rows:
        row_names, n = IOTools.ReadList(open(options.filename_rows, "r"))
    if options.filename_columns:
        column_names, n = IOTools.ReadList(open(options.filename_columns, "r"))

    for chunk in range(len(chunks) - 1):

        try:
            raw_matrix, row_headers, col_headers = MatlabTools.readMatrix(
                StringIO.StringIO("".join(lines[chunks[chunk] +
                                                1:chunks[chunk + 1]])),
                format=options.input_format,
                headers=options.headers,
                missing=options.missing)
        except ValueError, msg:
            E.warn("matrix could not be read: %s" % msg)
            continue

        nrows, ncols = raw_matrix.shape

        E.debug("read matrix: %i x %i, %i row titles, %i colum titles" %
                (nrows, ncols, len(row_headers), len(col_headers)))

        parameter = 0

        for method in options.methods:

            matrix = numpy.reshape(numpy.array(raw_matrix), raw_matrix.shape)

            if method in ("normalize-by-matrix", "subtract-matrix",
                          "mix-matrix", "add-matrix"):

                other_matrix, other_row_headers, other_col_headers = MatlabTools.ReadMatrix(
                    open(options.parameters[parameter], "r"),
                    headers=options.headers)

                other_nrows, other_ncols = other_matrix.shape

                if options.loglevel >= 2:
                    options.stdlog.write(
                        "# read second matrix from %s: %i x %i, %i row titles, %i colum titles.\n"
                        % (options.parameters[parameter], other_nrows,
                           other_ncols, len(other_row_headers),
                           len(other_col_headers)))

                parameter += 1

            elif method == "normalize-by-min-diagonal":
                for x in range(nrows):
                    for y in range(ncols):
                        m = min(raw_matrix[x, x], raw_matrix[y, y])
                        if m > 0:
                            matrix[x, y] = raw_matrix[x, y] / m

            elif method == "normalize-by-column":
                if nrows != ncols:
                    raise "only supported for symmeric matrices."

                for x in range(nrows):
                    for y in range(ncols):
                        if raw_matrix[y, y] > 0:
                            matrix[x, y] = raw_matrix[x, y] / raw_matrix[y, y]

            elif method == "normalize-by-value":
                matrix = raw_matrix / float(options.parameters[parameter])
                parameter += 1

            elif method == "normalize-by-row":
                if nrows != ncols:
                    raise "only supported for symmeric matrices."

                for x in range(nrows):
                    for y in range(ncols):
                        if raw_matrix[y, y] > 0:
                            matrix[x, y] = raw_matrix[x, y] / raw_matrix[x, x]

            elif method == "subtract-first-col":
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] -= raw_matrix[x, 0]

            elif method.startswith("normalize-by-column"):
                if method.endswith("max"):
                    f = max
                elif method.endswith("min"):
                    f = min
                elif method.endswith("median"):
                    f = scipy.median
                elif method.endswith("mean"):
                    f = scipy.mean
                elif method.endswith("total"):
                    f = sum

                for y in range(ncols):
                    m = f(matrix[:, y])
                    if m != 0:
                        for x in range(nrows):
                            matrix[x, y] = matrix[x, y] / m

            elif method.startswith("normalize-by-row"):
                if method.endswith("max"):
                    f = max
                elif method.endswith("min"):
                    f = min
                elif method.endswith("median"):
                    f = scipy.median
                elif method.endswith("mean"):
                    f = scipy.mean
                elif method.endswith("total"):
                    f = sum

                for x in range(nrows):
                    m = f(matrix[x, :])
                    if m != 0:
                        for y in range(ncols):
                            matrix[x, y] = raw_matrix[x, y] / m

            elif method == "negzero2value":
                # set zero/negative values to a value
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] <= 0:
                            matrix[x, y] = options.value

            elif method == "minmax":
                # set zero/negative values to a value
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y], matrix[y, x] = \
                            min(matrix[x, y], matrix[y, x]), \
                            max(matrix[x, y], matrix[y, x])

            elif method == "log":
                # apply log to all values.
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] > 0:
                            matrix[x, y] = math.log10(matrix[x, y])

            elif method == "ln":
                for x in range(nrows):
                    for y in range(ncols):
                        if matrix[x, y] > 0:
                            matrix[x, y] = math.log(matrix[x, y])

            elif method == "transpose":
                matrix = numpy.transpose(matrix)
                row_headers, col_headers = col_headers, row_headers
                nrows, ncols = ncols, nrows

            elif method == "mul":
                matrix = numpy.dot(matrix, numpy.transpose(matrix))
                col_headers = row_headers

            elif method == "multiply-by-value":
                matrix *= options.value

            elif method == "divide-by-value":
                matrix /= options.value

            elif method == "add-value":
                matrix += options.value

            elif method == "angle":
                # write angles between col vectors
                v1 = numpy.sqrt(numpy.sum(numpy.power(matrix, 2), 0))
                matrix = numpy.dot(numpy.transpose(matrix), matrix)
                row_headers = col_headers
                nrows = ncols
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] /= v1[x] * v1[y]

            elif method == "euclid":
                # convert to euclidean distance matrix
                matrix = numpy.zeros((ncols, ncols), numpy.float)
                for c1 in range(0, ncols - 1):
                    for c2 in range(c1 + 1, ncols):
                        for r in range(0, nrows):
                            d = raw_matrix[r][c1] - raw_matrix[r][c2]
                            matrix[c1, c2] += (d * d)
                        matrix[c2, c1] = matrix[c1, c2]
                matrix = numpy.sqrt(matrix)
                row_headers = col_headers
                nrows = ncols

            elif method.startswith("symmetrize"):
                f = method.split("-")[1]
                if f == "max":
                    f = max
                elif f == "min":
                    f = min
                elif f == "mean":
                    f = lambda x, y: float(x + y) / 2

                if nrows != ncols:
                    raise ValueError(
                        "symmetrize only available for symmetric matrices")
                if row_headers != col_headers:
                    raise ValueError(
                        "symmetrize not available for permuted matrices")
                for x in range(nrows):
                    for y in range(ncols):
                        matrix[x, y] = matrix[y,
                                              x] = f(matrix[x, y], matrix[y,
                                                                          x])
            elif method == "sub":
                matrix = options.value - matrix

            elif method in ("lower-bound", "upper-bound"):

                boundary = float(options.parameters[parameter])
                new_value = float(options.parameters[parameter + 1])
                parameter += 2
                if method == "upper-bound":
                    for x in range(nrows):
                        for y in range(ncols):
                            if matrix[x, y] > boundary:
                                matrix[x, y] = new_value
                else:
                    for x in range(nrows):
                        for y in range(ncols):
                            if matrix[x, y] < boundary:
                                matrix[x, y] = new_value

            elif method == "subtract-matrix":
                matrix = matrix - other_matrix

            elif method == "add-matrix":
                matrix = matrix + other_matrix

            elif method == "normalize-by-matrix":

                # set 0s to 1 in the other matrix
                for x in range(nrows):
                    for y in range(ncols):
                        if other_matrix[x, y] == 0:
                            other_matrix[x, y] = 1.0

                matrix = matrix / other_matrix

            elif method == "mix-matrix":
                for x in range(len(other_row_headers) - 1):
                    for y in range(x + 1, len(other_col_headers)):
                        matrix[x, y] = other_matrix[x, y]

            elif method == "set-diagonal":
                value = float(options.parameters[parameter])
                for x in range(min(nrows, ncols)):
                    matrix[x, x] = value
                parameter += 1

            elif method == "transpose":
                matrix = numpy.transpose(raw_matrix)
                row_headers, col_headers = col_headers, row_headers

            elif method == "correspondence-analysis":
                row_indices, col_indices = CorrespondenceAnalysis.GetIndices(
                    raw_matrix)
                map_row_new2old = numpy.argsort(row_indices)
                map_col_new2old = numpy.argsort(col_indices)

                matrix, row_headers, col_headers = CorrespondenceAnalysis.GetPermutatedMatrix(
                    raw_matrix,
                    map_row_new2old,
                    map_col_new2old,
                    row_headers=row_headers,
                    col_headers=col_headers)

            elif method == "mask-rows":
                r = set(row_names)
                for x in range(len(row_headers)):
                    if row_headers[x] in r:
                        matrix[x, :] = options.value

            elif method == "mask-columns":
                r = set(column_names)
                for x in range(len(col_headers)):
                    if col_headers[x] in r:
                        matrix[:, x] = options.value

            elif method == "mask-rows-and-columns":

                r = set(row_names)
                c = set(column_names)
                for x in range(len(row_headers)):
                    for y in range(len(col_headers)):
                        if row_headers[x] in r and col_headers[y] in c:
                            matrix[x, y] = options.value

            raw_matrix = numpy.reshape(numpy.array(matrix), matrix.shape)

        else:
            # for simple re-formatting jobs
            matrix = raw_matrix

        if options.write_separators:
            options.stdout.write(lines[chunks[chunk]])

        MatlabTools.writeMatrix(sys.stdout,
                                matrix,
                                value_format=options.format,
                                format=options.output_format,
                                row_headers=row_headers,
                                col_headers=col_headers)