Python IOTools.readTable Examples

Programming Language: Python

Namespace/Package Name: CGAT

Class/Type: IOTools

Method/Function: readTable

Examples at hotexamples.com: 10

Python IOTools.readTable - 10 examples found. These are the top rated real world Python examples of CGAT.IOTools.readTable extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

openFile(30)

ReadMap(23)

ReadList(21)

isEmpty(14)

writeLines(9)

readMap(9)

which(8)

getInvertedDictionary(7)

readList(7)

prettyPercent(7)

zapFile(6)

convertDictionary(6)

snip(5)

FilePool(5)

iterate(5)

getNumLines(4)

readTable(4)

flatten(4)

readMultiMap(3)

str2val(3)

touchFile(3)

writeMatrix(3)

isComplete(2)

getLastLine(2)

readMatrix(2)

val2str(2)

human2bytes(1)

force_str(1)

cloneFile(1)

prettyFloat(1)

Example #1

Show file

File: histograms2kl.py Project: Q-KIM/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id",
        usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      help="method to use [kl=kullback-leibler]",
                      choices=("kl",))
    parser.add_option("-n", "--no-normalize", dest="normalize", action="store_false",
                      help="do not normalize data")
    parser.add_option("-p", "--pseudocounts", dest="pseudocounts", type="int",
                      help="pseudocounts to add.")
    parser.add_option("-f", "--number-format", dest="number_format", type="string",
                      help="number format.")

    parser.set_defaults(
        method="kl",
        columns="all",
        headers=True,
        xrange=None,
        pseudocounts=1,
        normalize=True,
        number_format="%6.4f"
    )

    (options, args) = E.Start(parser,
                              add_pipe_options=True)

    if options.xrange:
        options.xrange = map(float, options.xrange.split(","))

    data, legend = IOTools.readTable(sys.stdin,
                                     numeric_type=numpy.float32,
                                     take=options.columns,
                                     headers=options.headers,
                                     truncate=options.xrange)

    nrows, ncols = data.shape

    # first: normalize rows
    for y in range(1, ncols):
        for x in range(nrows):
            data[x, y] = data[x, y] + float(options.pseudocounts)
        if options.normalize:
            t = numpy.sum(data[:, y])
            for x in range(nrows):
                data[x, y] = data[x, y] / t

    for x in range(1, len(legend) - 1):
        for y in range(x + 1, len(legend)):

            if options.method == "kl":
                d1 = 0.0
                d2 = 0.0
                for bin in range(nrows):
                    p = data[bin, x]
                    q = data[bin, y]
                    d1 += p * math.log(p / q)
                    d2 += q * math.log(q / p)

                options.stdout.write("%s\t%s\t%s\n" %
                                     (legend[x], legend[y],
                                      options.number_format % d1))
                options.stdout.write("%s\t%s\t%s\n" %
                                     (legend[y], legend[x],
                                      options.number_format % d2))

    E.Stop()

Example #2

Show file

File: histogram2histogram.py Project: zpeng1989/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: histogram2histogram.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-i",
                      "--is-int",
                      dest="is_ints",
                      action="store_true",
                      help="categories are integers.")
    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("append", "cumul", "rcumul", "normalize"),
                      help="method(s) to apply.")
    parser.add_option("--no-headers",
                      dest="headers",
                      action="store_false",
                      help="histogram has no headers.")
    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to use for plotting.")
    parser.add_option("",
                      "--truncate",
                      dest="truncate",
                      type="string",
                      help="truncate at range.")
    parser.add_option("",
                      "--no-out-of-range",
                      dest="cumulate_out_of_range",
                      action="store_false",
                      help="add up bins out of range.")
    parser.add_option("--bin-format",
                      dest="format_bin",
                      type="string",
                      help="format for bins.")
    parser.add_option("--value-format",
                      dest="format_val",
                      type="string",
                      help="format for vals.")

    parser.set_defaults(
        is_ints=False,
        method="append",
        columns="all",
        headers=True,
        truncate=None,
        cumulate_out_of_range=True,
        format_bin="%6.4f",
        format_val="%6.4f",
    )

    (options, args) = E.Start(parser)

    # old histogram2histogram.py semantics - need to merged with newer
    # code below.
    if options.method == "append":

        vals = []

        # retrieve histogram
        lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

        # check if first line contains a header
        d = string.split(lines[0][:-1], "\t")[0]
        try:
            if options.is_ints:
                value = int(d)
            else:
                value = float(d)
        except ValueError:
            print string.join(
                (d, "counts", "frequency", "cumulative counts",
                 "increasing cumulative frequency", "cumulative counts",
                 "decreasing cumulative frequency"), "\t")
            del lines[0]

        data = map(lambda x: map(float, string.split(x[:-1], "\t")), lines)

        if len(data) == 0:
            raise "No data found."

        total = float(reduce(lambda x, y: x + y, map(lambda x: x[1], data)))

        cumul_down = int(total)
        cumul_up = 0

        if options.is_ints:
            form = "%i\t%i\t%6.4f\t%i\t%6.4f\t%i\t%6.4f"
        else:
            form = "%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f"

        for bin, val in data:
            percent = float(val) / total
            cumul_up += val
            percent_cumul_up = float(cumul_up) / total
            percent_cumul_down = float(cumul_down) / total

            print form % \
                (bin, val, percent, cumul_up, percent_cumul_up,
                 cumul_down, percent_cumul_down)

            cumul_down -= val

    else:

        if options.truncate:
            options.truncate = map(float, options.truncate.split(","))

        options.method = options.method.split(",")
        data, legend = IOTools.readTable(
            sys.stdin,
            numeric_type=numpy.float32,
            take=options.columns,
            headers=options.headers,
            truncate=options.truncate,
            cumulate_out_of_range=options.cumulate_out_of_range)

        nfields = len(legend)

        # note: because of MA, iteration makes copy of slices
        # Solution: inplace edits.
        nrows, ncols = data.shape

        for method in options.method:
            if method == "cumul":
                l = [0] * ncols
                for x in range(nrows):
                    for y in range(1, ncols):
                        data[x, y] += l[y]
                        l[y] = data[x, y]

            elif method == "rcumul":
                l = [0] * ncols
                for x in range(nrows - 1, 0, -1):
                    for y in range(1, ncols):
                        data[x, y] += l[y]
                        l[y] = data[x, y]

            elif method == "normalize":
                m = [0] * ncols
                for x in range(nrows):
                    for y in range(1, ncols):
                        # the conversion to float is necessary
                        m[y] = max(m[y], float(data[x, y]))

                for y in range(1, ncols):
                    if m[y] == 0:
                        m[y] = 1.0

                for x in range(nrows):
                    for y in range(1, ncols):
                        data[x, y] = data[x, y] / m[y]
            else:
                raise "unknown method %s" % method

        print "\t".join(legend)

        format = options.format_bin + "\t" + \
            "\t".join([options.format_val] * (nfields - 1))

        for d in data:
            print format % tuple(d)

    E.Stop()

Example #3

Show file

File: histograms2kl.py Project: gsc0107/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id",
                            usage=globals()["__doc__"])

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      help="method to use [kl=kullback-leibler]",
                      choices=("kl", ))
    parser.add_option("-n",
                      "--no-normalize",
                      dest="normalize",
                      action="store_false",
                      help="do not normalize data")
    parser.add_option("-p",
                      "--pseudocounts",
                      dest="pseudocounts",
                      type="int",
                      help="pseudocounts to add.")
    parser.add_option("-f",
                      "--number-format",
                      dest="number_format",
                      type="string",
                      help="number format.")

    parser.set_defaults(method="kl",
                        columns="all",
                        headers=True,
                        xrange=None,
                        pseudocounts=1,
                        normalize=True,
                        number_format="%6.4f")

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.xrange:
        options.xrange = list(map(float, options.xrange.split(",")))

    data, legend = IOTools.readTable(sys.stdin,
                                     numeric_type=numpy.float32,
                                     take=options.columns,
                                     headers=options.headers,
                                     truncate=options.xrange)

    nrows, ncols = data.shape

    # first: normalize rows
    for y in range(1, ncols):
        for x in range(nrows):
            data[x, y] = data[x, y] + float(options.pseudocounts)
        if options.normalize:
            t = numpy.sum(data[:, y])
            for x in range(nrows):
                data[x, y] = data[x, y] / t

    for x in range(1, len(legend) - 1):
        for y in range(x + 1, len(legend)):

            if options.method == "kl":
                d1 = 0.0
                d2 = 0.0
                for bin in range(nrows):
                    p = data[bin, x]
                    q = data[bin, y]
                    d1 += p * math.log(p / q)
                    d2 += q * math.log(q / p)

                options.stdout.write(
                    "%s\t%s\t%s\n" %
                    (legend[x], legend[y], options.number_format % d1))
                options.stdout.write(
                    "%s\t%s\t%s\n" %
                    (legend[y], legend[x], options.number_format % d2))

    E.Stop()

Example #4

Show file

File: data2multiple_anova.py Project: Charlie-George/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: data2multiple_anova.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-c", "--columns", dest="columns", type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option("-t", "--filename-tree", dest="filename_tree", type="string",
                      help="filename with tree(s).")
    parser.add_option("--skip-header", dest="add_header", action="store_false",
                      help="do not add header to flat format.")
    parser.add_option("--write-header", dest="write_header", action="store_true",
                      help="write header and exit.")
    parser.add_option("--debug", dest="debug", action="store_true",
                      help="debug mode")
    parser.add_option("--display-tree", dest="display_tree", action="store_true",
                      help="display the tree")

    parser.add_option("-m", "--method", dest="methods", type="choice", action="append",
                      choices=("contrasts", "spearman", "pearson", "compute"),
                      help="methods to perform on contrasts.")

    parser.set_defaults(
        columns="all",
        filename_tree=None,
        add_header=True,
        write_header=False,
        debug=False,
        methods=[],
        value_format="%6.4f",
        pvalue_format="%e",
        display_tree=False,
    )

    (options, args) = E.Start(parser, quiet=True)

    if options.columns not in ("all", "all-but-first"):
        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

    data = []

    options.filenames = args

    for filename in options.filenames:

        infile = open(filename, "r")
        table, headers = IOTools.readTable(
            infile, take=options.columns, headers=False)
        infile.close()

        data.append(table)

    fields = ["Df", "Sum Sq", "F value", "Pr(>F)", "Mean Sq"]

    options.stdout.write("set1\tset2")
    for field in fields:
        options.stdout.write("\t%s" % field)
    options.stdout.write("\n")

    # CODE needs to be refactored for rpy2 usage

    for x in range(len(data)):

        for y in range(x + 1, len(data)):

            rpy.set_default_mode(rpy.NO_CONVERSION)

            factors = ["x"] * len(data[x][:, 0]) + ["y"] * len(data[y][:, 0])
            values = list(data[x][:, 0]) + list(data[y][:, 0])

            linear_model = R.lm(
                R("y ~ x"), data=R.data_frame(x=factors, y=values))
            rpy.set_default_mode(rpy.BASIC_CONVERSION)
            result = R.anova(linear_model)

            options.stdout.write(
                "%s\t%s" % (options.filenames[x], options.filenames[y]))
            for field in fields:
                options.stdout.write("\t%s" % str(result[field]))
            options.stdout.write("\n")

Example #5

Show file

File: histogram2histogram.py Project: CGATOxford/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: histogram2histogram.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-i", "--is-int", dest="is_ints", action="store_true",
                      help="categories are integers.")
    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("append", "cumul", "rcumul", "normalize"),
                      help="method(s) to apply.")
    parser.add_option("--no-headers", dest="headers", action="store_false",
                      help="histogram has no headers.")
    parser.add_option("-c", "--columns", dest="columns", type="string",
                      help="columns to use for plotting.")
    parser.add_option("", "--truncate", dest="truncate", type="string",
                      help="truncate at range.")
    parser.add_option("", "--no-out-of-range", dest="cumulate_out_of_range", action="store_false",
                      help="add up bins out of range.")
    parser.add_option("--bin-format", dest="format_bin", type="string",
                      help="format for bins.")
    parser.add_option("--value-format", dest="format_val", type="string",
                      help="format for vals.")

    parser.set_defaults(
        is_ints=False,
        method="append",
        columns="all",
        headers=True,
        truncate=None,
        cumulate_out_of_range=True,
        format_bin="%6.4f",
        format_val="%6.4f",
    )

    (options, args) = E.Start(parser)

    # old histogram2histogram.py semantics - need to merged with newer
    # code below.
    if options.method == "append":

        vals = []

        # retrieve histogram
        lines = [x for x in sys.stdin.readlines() if x[0] != "#"]

        # check if first line contains a header
        d = lines[0][:-1].split("\t")[0]
        try:
            if options.is_ints:
                value = int(d)
            else:
                value = float(d)
        except ValueError:
            print("\t".join(
                (d, "counts", "frequency",
                 "cumulative counts", "increasing cumulative frequency",
                 "cumulative counts", "decreasing cumulative frequency")))
            del lines[0]

        data = [list(map(float, x[:-1].split("\t"))) for x in lines]

        if len(data) == 0:
            raise ValueError("no data found")

        total = float(reduce(lambda x, y: x + y, [x[1] for x in data]))

        cumul_down = int(total)
        cumul_up = 0

        if options.is_ints:
            form = "%i\t%i\t%6.4f\t%i\t%6.4f\t%i\t%6.4f"
        else:
            form = "%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%6.4f"

        for bin, val in data:
            percent = float(val) / total
            cumul_up += val
            percent_cumul_up = float(cumul_up) / total
            percent_cumul_down = float(cumul_down) / total

            print(form %
                  (bin, val, percent, cumul_up, percent_cumul_up,
                   cumul_down, percent_cumul_down))

            cumul_down -= val

    else:

        if options.truncate:
            options.truncate = list(map(float, options.truncate.split(",")))

        options.method = options.method.split(",")
        data, legend = IOTools.readTable(sys.stdin,
                                         numeric_type=numpy.float32,
                                         take=options.columns,
                                         headers=options.headers,
                                         truncate=options.truncate,
                                         cumulate_out_of_range=options.cumulate_out_of_range)

        nfields = len(legend)

        # note: because of MA, iteration makes copy of slices
        # Solution: inplace edits.
        nrows, ncols = data.shape

        for method in options.method:
            if method == "cumul":
                l = [0] * ncols
                for x in range(nrows):
                    for y in range(1, ncols):
                        data[x, y] += l[y]
                        l[y] = data[x, y]

            elif method == "rcumul":
                l = [0] * ncols
                for x in range(nrows - 1, 0, -1):
                    for y in range(1, ncols):
                        data[x, y] += l[y]
                        l[y] = data[x, y]

            elif method == "normalize":
                m = [0] * ncols
                for x in range(nrows):
                    for y in range(1, ncols):
                        # the conversion to float is necessary
                        m[y] = max(m[y], float(data[x, y]))

                for y in range(1, ncols):
                    if m[y] == 0:
                        m[y] = 1.0

                for x in range(nrows):
                    for y in range(1, ncols):
                        data[x, y] = data[x, y] / m[y]
            else:
                raise "unknown method %s" % method

        print("\t".join(legend))

        format = options.format_bin + "\t" + \
            "\t".join([options.format_val] * (nfields - 1))

        for d in data:
            print(format % tuple(d))

    E.Stop()

Example #6

Show file

File: plot_histogram.py Project: lesheng/cgat

def main():

    parser = E.OptionParser(
        version="%prog version: $Id: plot_histogram.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-l", "--legend", dest="legend", type="string",
                      help="legend for plot [default=%default].")
    parser.add_option("-t", "--title", dest="title", type="string",
                      help="title for plot [default=%default].")
    parser.add_option("-p", "--hardcopy", dest="hardcopy", type="string",
                      help="filename for hardcopy of plot. The extension defines the format. Known extensions are: 'emf, eps, jpeg, jpg, pdf, png, ps, raw, rgba, svg, svgz' [default=%default].", metavar="FILE")
    parser.add_option("", "--xrange", dest="xrange", type="string",
                      help="x viewing range of plot [default=%default].")
    parser.add_option("", "--yrange", dest="yrange", type="string",
                      help="y viewing range of plot[default=%default].")
    parser.add_option("-o", "--logscale", dest="logscale", type="string",
                      help="use logscale on x, y or xy [default=%default]")
    parser.add_option("-x", "--xtitle", dest="xtitle", type="string",
                      help="title for x axis [default=%default]")
    parser.add_option("-y", "--ytitle", dest="ytitle", type="string",
                      help="title for y axis [default=%default]")
    parser.add_option("-d", "--dpi", dest="dpi", type="int",
                      help="dpi of images [default=%default]")
    parser.add_option("-n", "--normalize", dest="normalize", action="store_true",
                      help="normalize histograms [default=%default]")
    parser.add_option("--cumulate", dest="cumulate", action="store_true",
                      help="calculate cumulative histogram [default=%default].")
    parser.add_option("--reverse-cumulate", dest="reverse_cumulate", action="store_true",
                      help="calculate cumulative histogram in reverse order [default=%default].")
    parser.add_option("--legend-location", dest="legend_location", type="choice",
                      choices=("upper left", "upper right", "lower left",
                               "lower right", "center", "center right", "center left", "none"),
                      help="location of legend [default=%default]")
    parser.add_option("--backend", dest="backend", type="string",
                      help="backend to use [Agg|SVG|PS] [default=%default]")
    parser.add_option("--symbols", dest="symbols", type="string",
                      help="symbols to use for each histogram [steps|...] [default=%default].")
    parser.add_option("--dump", dest="dump", action="store_true",
                      help="dump data for debug purposes [default=%default].")
    parser.add_option("-c", "--columns", dest="columns", type="string",
                      help="columns to use for plotting [default=%default].")
    parser.add_option("--truncate", dest="truncate", action="store_true",
                      help="truncate date within x range. If not set, xrange is simply a viewing range [default=%default].")
    parser.add_option("--as-lines", dest="as_lines", action="store_true",
                      help="plot only lines, no symbols [default=%default].")
    parser.add_option("--noheaders", dest="headers", action="store_false",
                      help="do not take first input line as header [default=%default].")
    parser.add_option("--stacked", dest="stacked", action="store_true",
                      help="do a stacked plot [default=%default].")
    parser.add_option("--add-function", dest="function", type="string",
                      help="add a function to the plot [default=%default].")
    parser.add_option("--add-error-bars", dest="error_bars", type="choice",
                      choices=("interleaved", "blocked"),
                      help="add error bars. The input format is 'interleaved' or 'blocked'. In the interleaved format the error follows each column. I the blocked format first the data, then the errors in the same order [default=%default].")

    parser.set_defaults(
        legend=None,
        title=None,
        hardcopy=None,
        logscale=None,
        xtitle=None,
        ytitle=None,
        xrange=None,
        yrange=None,
        normalize=None,
        columns="all",
        headers=True,
        legend_location="upper right",
        backend="cairo",
        symbols="g-D,b-h,r-+,c-+,m-+,y-+,k-o,g-^,b-<,r->,c-D,m-h",
        dump=False,
        truncate=False,
        cumulate=False,
        reverse_cumulate=False,
        function=None,
        add_error_bars=None,
        as_lines=False,
        stacked=False,
        dpi=80,
    )

    (options, args) = E.Start(parser)

    # import matplotlib/pylab. Has to be done here
    # for batch scripts without GUI.
    import matplotlib
    if options.hardcopy:
        matplotlib.use("cairo")
    import pylab

    # put this method here (because it requires pylab)
    def doStackedPlot(data, legend):

        colors = ["red",
                  "blue",
                  "green",
                  "cyan",
                  "magenta",
                  "yellow",
                  "brown",
                  "silver",
                  "purple",
                  "lightyellow",
                  "black",
                  "ivory",
                  "pink",
                  "orange",
                  "gray",
                  "teal"]

        ax = data[:, 0]
        xvals = numpy.concatenate((ax, ax[::-1]))
        y_top = numpy.zeros(len(ax))

        min_y = min(data[:, 1:].flat)
        max_y = min_y
        new_legend, dummy_lines = [], []

        for i in range(1, len(legend)):
            new_y_top = y_top + data[:, i]
            yvals = numpy.concatenate((new_y_top, y_top[::-1]))
            p = pylab.fill(xvals,
                           yvals,
                           colors[i % len(colors)])

            y_top = new_y_top
            max_y = max(y_top)

            dummy_lines.append(pylab.plot(xvals,
                                          yvals,
                                          colors[i % len(colors)]))

            new_legend.append(legend[i])

        if not options.xrange:
            options.xrange = min(data[:, 0]), max(data[:, 0])

        if not options.yrange:
            options.yrange = 0, max_y

        return dummy_lines, new_legend

    if options.as_lines:
        options.symbols = []
        for y in ("-", ":", "--"):
            for x in "gbrcmyk":
                options.symbols.append(y + x)
    else:
        options.symbols = options.symbols.split(",")

    if options.xrange:
        options.xrange = map(float, options.xrange.split(","))
    if options.yrange:
        options.yrange = map(float, options.yrange.split(","))

    # Added support for (inclusive) range format: "1,3,5,7-100"  (Gerton
    # 13/12/06)
    if options.columns != "all":
        cols = []
        for d in options.columns.split(','):
            colopts = d.split('-')
            if len(colopts) == 2:
                cols += range(int(colopts[0]), int(colopts[1]) + 1)
            else:
                cols += [int(d) - 1]
        options.columns = cols

    if args:
        if args[0] == "-":
            infile = sys.stdin
        else:
            infile = open(args[0], "r")
    else:
        infile = sys.stdin

    if options.truncate:
        xr = options.xrange
    else:
        xr = None

    data, legend = IOTools.readTable(infile,
                                     numeric_type=numpy.float,
                                     take=options.columns,
                                     headers=options.headers,
                                     truncate=xr)

    if infile != sys.stdin:
        infile.close()
    if len(data) == 0:  # or data is None:
        E.info("empty table: no plot")
        E.Stop()
        return

    nrows, ncols = data.shape

    # note: because of MA, iteration makes copy of slices
    # Solution: inplace edits.
    if options.cumulate:
        if options.add_error_bars:
            raise "can not add error bars to cumulative histogram."
        if data.mask.any():
            # cumsum does not work with masked arrays, so do it manually
            for y in range(1, ncols):
                c = 0
                for x in range(0, nrows):
                    if not data.mask[x, y]:
                        data[x, y] += c
                        c = data[x, y]
        else:
            for x in range(1, ncols):
                data[:, x] = data[:, x].cumsum()

    elif options.reverse_cumulate:
        if options.add_error_bars:
            raise "can not add error bars to cumulative histogram."
        if data.mask.any():
            l = [0] * ncols
            for x in range(nrows - 1, -1, -1):
                for y in range(1, ncols):
                    if not data.mask[x, y]:
                        data[x, y] += l[y]
                        l[y] = data[x, y]
        else:
            l = [0] * ncols
            for x in range(nrows - 1, -1, -1):
                for y in range(1, ncols):
                    data[x, y] += l[y]
                    l[y] = data[x, y]

    if options.normalize:
        if options.add_error_bars:
            raise "can not add error bars to normalized histogram."
        if data.mask.any():
            m = [0] * ncols
            for x in range(nrows):
                for y in range(1, ncols):
                    if not data.mask[x, y]:
                        m[y] = max(m[y], float(data[x, y]))

            for y in range(1, ncols):
                if m[y] == 0:
                    m[y] = 1.0

            for x in range(nrows):
                for y in range(1, ncols):
                    data[x, y] = data[x, y] / m[y]
        else:
            for x in range(1, ncols):
                m = float(data[:, x].max())
                data[:, x] /= m

    if options.legend:
        legend = options.legend.split(",")

    if options.dump:
        for d in data:
            print d

    if options.title:
        pylab.title(options.title)

    if options.xtitle:
        pylab.xlabel(options.xtitle)
    else:
        pylab.xlabel(legend[0])

    if options.ytitle:
        pylab.ylabel(options.ytitle)

    lines = []
    # use dummy_lines to workaround a bug in errorbars that
    # causes the line styles to be set incorrectly.
    dummy_lines = []
    new_legend = []

    if options.error_bars:
        if options.error_bars == "interleaved":
            step_size = 2
            max_size = len(legend)
        elif options.error_bars == "blocked":
            step_size = 1
            max_size = (len(legend) - 1) / 2
    else:
        step_size = 1
        max_size = len(legend)

    if options.stacked:
        dummy_lines, new_legend = doStackedPlot(data, legend)
    else:
        nplotted = 0
        nskipped = 0
        for x in range(1, max_size, step_size):

            s = options.symbols[nplotted % len(options.symbols)]

            yvals = data[:, x]

            xvals = numpy.ma.masked_array(data[:, 0], numpy.ma.getmask(yvals))

            xvals = xvals.compressed()
            yvals = yvals.compressed()

            if len(xvals) == 0:
                E.warn("skipped empty column %i: %s" % (x, legend[x]))

            if options.error_bars == "interleaved":
                yerr = data[:, x + 1]
                yerr = yerr.compressed()
            else:
                yerr = None

            lines.append(pylab.errorbar(xvals,
                                        yvals,
                                        yerr=yerr,
                                        fmt=s))

            dummy_lines.append(pylab.plot(xvals,
                                          yvals,
                                          s))

            new_legend.append(legend[x])

            nplotted += 1

        E.info("nplotted=%i, nskipped=%i" % (nplotted, nskipped))

    if len(lines) == 0:
        E.Stop()
        return

    if options.legend_location != "none":
        pylab.figlegend(dummy_lines,
                        new_legend,
                        options.legend_location)

    if options.logscale:
        if "x" in options.logscale:
            pylab.gca().set_xscale('log')
        if "y" in options.logscale:
            pylab.gca().set_yscale('log')

    if options.xrange:
        pylab.xlim(options.xrange)

    if options.yrange:
        pylab.ylim(options.yrange)

    if options.function:
        xstart, xend = pylab.gca().get_xlim()
        increment = (xend - xstart) / 100.0
        exec("f = lambda x: %s" % options.function) in locals()
        xvals, yvals = [], []
        for x in range(0, 100):
            xvals.append(xstart)
            yvals.append(f(xstart))
            xstart += increment
        xvals.append(xstart)
        yvals.append(f(xstart))

        pylab.plot(xvals, yvals)

    if options.hardcopy:
        pylab.savefig(os.path.expanduser(options.hardcopy), dpi=options.dpi)
    else:
        pylab.show()

    E.Stop()

Example #7

Show file

File: histogram2histogram.py Project: siping/cgat

            percent_cumul_up = float(cumul_up) / total
            percent_cumul_down = float(cumul_down) / total        

            print form % \
                  (bin, val, percent, cumul_up, percent_cumul_up, cumul_down, percent_cumul_down)

            cumul_down -= val
        
    else:

        if options.truncate: options.truncate = map(float, options.truncate.split(","))

        options.method = options.method.split(",")
        data, legend = IOTools.readTable( sys.stdin,
                                          numeric_type=numpy.float32,
                                          take=options.columns,
                                          headers = options.headers,
                                          truncate= options.truncate,
                                          cumulate_out_of_range = options.cumulate_out_of_range )

        nfields = len(legend)

        ## note: because of MA, iteration makes copy of slices
        ## Solution: inplace edits.
        nrows, ncols = data.shape

        for method in options.method:
            if method == "cumul":
                l = [0] * ncols
                for x in range(nrows):
                    for y in range(1, ncols):
                        data[x,y] += l[y]

Example #8

Show file

File: data2multiple_anova.py Project: kathrinjansen/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: data2multiple_anova.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to take for calculating histograms.")
    parser.add_option("-t",
                      "--tree-nh-file",
                      dest="filename_tree",
                      type="string",
                      help="filename with tree(s).")
    parser.add_option("--skip-header",
                      dest="add_header",
                      action="store_false",
                      help="do not add header to flat format.")
    parser.add_option("--output-with-header",
                      dest="write_header",
                      action="store_true",
                      help="write header and exit.")
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="debug mode")
    parser.add_option("--display-tree",
                      dest="display_tree",
                      action="store_true",
                      help="display the tree")

    parser.add_option("-m",
                      "--method",
                      dest="methods",
                      type="choice",
                      action="append",
                      choices=("contrasts", "spearman", "pearson", "compute"),
                      help="methods to perform on contrasts.")

    parser.set_defaults(
        columns="all",
        filename_tree=None,
        add_header=True,
        write_header=False,
        debug=False,
        methods=[],
        value_format="%6.4f",
        pvalue_format="%e",
        display_tree=False,
    )

    (options, args) = E.Start(parser, quiet=True)

    if options.columns not in ("all", "all-but-first"):
        options.columns = [int(x) - 1 for x in options.columns.split(",")]

    data = []

    options.filenames = args

    for filename in options.filenames:

        infile = IOTools.openFile(filename, "r")
        table, headers = IOTools.readTable(infile,
                                           take=options.columns,
                                           headers=False)
        infile.close()

        data.append(table)

    fields = ["Df", "Sum Sq", "F value", "Pr(>F)", "Mean Sq"]

    options.stdout.write("set1\tset2")
    for field in fields:
        options.stdout.write("\t%s" % field)
    options.stdout.write("\n")

    # CODE needs to be refactored for rpy2 usage

    for x in range(len(data)):

        for y in range(x + 1, len(data)):

            rpy.set_default_mode(rpy.NO_CONVERSION)

            factors = ["x"] * len(data[x][:, 0]) + ["y"] * len(data[y][:, 0])
            values = list(data[x][:, 0]) + list(data[y][:, 0])

            linear_model = R.lm(R("y ~ x"),
                                data=R.data_frame(x=factors, y=values))
            rpy.set_default_mode(rpy.BASIC_CONVERSION)
            result = R.anova(linear_model)

            options.stdout.write("%s\t%s" %
                                 (options.filenames[x], options.filenames[y]))
            for field in fields:
                options.stdout.write("\t%s" % str(result[field]))
            options.stdout.write("\n")

Example #9

Show file

File: data2multiple_anova.py Project: siping/cgat

        display_tree = False,
        )

    (options, args) = E.Start( parser, quiet = True )

    if options.columns not in ( "all", "all-but-first"):
        options.columns = map(lambda x: int(x) -1 , options.columns.split(","))

    data = []

    options.filenames = args

    for filename in options.filenames:
        
        infile = open(filename,"r")
        table, headers = IOTools.readTable( infile, take = options.columns, headers=False)
        infile.close()

        data.append( table )
        

    fields = [ "Df", "Sum Sq", "F value", "Pr(>F)", "Mean Sq"]
    
    options.stdout.write("set1\tset2" )
    for field in fields:
        options.stdout.write("\t%s" % field )
    options.stdout.write("\n" )

    # CODE needs to be refactored for rpy2 usage

    for x in range( len(data )):

Example #10

Show file

def main(argv=None):

    parser = E.OptionParser(
        version=
        "%prog version: $Id: plot_histogram.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-l",
                      "--legend",
                      dest="legend",
                      type="string",
                      help="legend for plot [default=%default].")
    parser.add_option("-t",
                      "--title",
                      dest="title",
                      type="string",
                      help="title for plot [default=%default].")
    parser.add_option(
        "-p",
        "--hardcopy",
        dest="hardcopy",
        type="string",
        help=
        "filename for hardcopy of plot. The extension defines the format. Known extensions are: 'emf, eps, jpeg, jpg, pdf, png, ps, raw, rgba, svg, svgz' [default=%default].",
        metavar="FILE")
    parser.add_option("",
                      "--xrange",
                      dest="xrange",
                      type="string",
                      help="x viewing range of plot [default=%default].")
    parser.add_option("",
                      "--yrange",
                      dest="yrange",
                      type="string",
                      help="y viewing range of plot[default=%default].")
    parser.add_option("-o",
                      "--logscale",
                      dest="logscale",
                      type="string",
                      help="use logscale on x, y or xy [default=%default]")
    parser.add_option("-x",
                      "--xtitle",
                      dest="xtitle",
                      type="string",
                      help="title for x axis [default=%default]")
    parser.add_option("-y",
                      "--ytitle",
                      dest="ytitle",
                      type="string",
                      help="title for y axis [default=%default]")
    parser.add_option("-d",
                      "--dpi",
                      dest="dpi",
                      type="int",
                      help="dpi of images [default=%default]")
    parser.add_option("-n",
                      "--normalize",
                      dest="normalize",
                      action="store_true",
                      help="normalize histograms [default=%default]")
    parser.add_option(
        "--cumulate",
        dest="cumulate",
        action="store_true",
        help="calculate cumulative histogram [default=%default].")
    parser.add_option(
        "--reverse-cumulate",
        dest="reverse_cumulate",
        action="store_true",
        help=
        "calculate cumulative histogram in reverse order [default=%default].")
    parser.add_option("--legend-location",
                      dest="legend_location",
                      type="choice",
                      choices=("upper left", "upper right", "lower left",
                               "lower right", "center", "center right",
                               "center left", "none"),
                      help="location of legend [default=%default]")
    parser.add_option("--backend",
                      dest="backend",
                      type="string",
                      help="backend to use [Agg|SVG|PS] [default=%default]")
    parser.add_option(
        "--symbols",
        dest="symbols",
        type="string",
        help="symbols to use for each histogram [steps|...] [default=%default]."
    )
    parser.add_option("--dump",
                      dest="dump",
                      action="store_true",
                      help="dump data for debug purposes [default=%default].")
    parser.add_option("-c",
                      "--columns",
                      dest="columns",
                      type="string",
                      help="columns to use for plotting [default=%default].")
    parser.add_option(
        "--truncate",
        dest="truncate",
        action="store_true",
        help=
        "truncate date within x range. If not set, xrange is simply a viewing range [default=%default]."
    )
    parser.add_option("--as-lines",
                      dest="as_lines",
                      action="store_true",
                      help="plot only lines, no symbols [default=%default].")
    parser.add_option(
        "--noheaders",
        dest="headers",
        action="store_false",
        help="do not take first input line as header [default=%default].")
    parser.add_option("--stacked",
                      dest="stacked",
                      action="store_true",
                      help="do a stacked plot [default=%default].")
    parser.add_option("--add-function",
                      dest="function",
                      type="string",
                      help="add a function to the plot [default=%default].")
    parser.add_option(
        "--add-error-bars",
        dest="error_bars",
        type="choice",
        choices=("interleaved", "blocked"),
        help=
        "add error bars. The input format is 'interleaved' or 'blocked'. In the interleaved format the error follows each column. I the blocked format first the data, then the errors in the same order [default=%default]."
    )

    parser.set_defaults(
        legend=None,
        title=None,
        hardcopy=None,
        logscale=None,
        xtitle=None,
        ytitle=None,
        xrange=None,
        yrange=None,
        normalize=None,
        columns="all",
        headers=True,
        legend_location="upper right",
        backend="cairo",
        symbols="g-D,b-h,r-+,c-+,m-+,y-+,k-o,g-^,b-<,r->,c-D,m-h",
        dump=False,
        truncate=False,
        cumulate=False,
        reverse_cumulate=False,
        function=None,
        add_error_bars=None,
        as_lines=False,
        stacked=False,
        dpi=80,
    )

    (options, args) = E.Start(parser)

    # import matplotlib/pylab. Has to be done here
    # for batch scripts without GUI.
    import matplotlib
    if options.hardcopy:
        matplotlib.use("cairo")
    import pylab

    # put this method here (because it requires pylab)
    def doStackedPlot(data, legend):

        colors = [
            "red", "blue", "green", "cyan", "magenta", "yellow", "brown",
            "silver", "purple", "lightyellow", "black", "ivory", "pink",
            "orange", "gray", "teal"
        ]

        ax = data[:, 0]
        xvals = numpy.concatenate((ax, ax[::-1]))
        y_top = numpy.zeros(len(ax))

        min_y = min(data[:, 1:].flat)
        max_y = min_y
        new_legend, dummy_lines = [], []

        for i in range(1, len(legend)):
            new_y_top = y_top + data[:, i]
            yvals = numpy.concatenate((new_y_top, y_top[::-1]))
            p = pylab.fill(xvals, yvals, colors[i % len(colors)])

            y_top = new_y_top
            max_y = max(y_top)

            dummy_lines.append(
                pylab.plot(xvals, yvals, colors[i % len(colors)]))

            new_legend.append(legend[i])

        if not options.xrange:
            options.xrange = min(data[:, 0]), max(data[:, 0])

        if not options.yrange:
            options.yrange = 0, max_y

        return dummy_lines, new_legend

    if options.as_lines:
        options.symbols = []
        for y in ("-", ":", "--"):
            for x in "gbrcmyk":
                options.symbols.append(y + x)
    else:
        options.symbols = options.symbols.split(",")

    if options.xrange:
        options.xrange = map(float, options.xrange.split(","))
    if options.yrange:
        options.yrange = map(float, options.yrange.split(","))

    # Added support for (inclusive) range format: "1,3,5,7-100"  (Gerton
    # 13/12/06)
    if options.columns != "all":
        cols = []
        for d in options.columns.split(','):
            colopts = d.split('-')
            if len(colopts) == 2:
                cols += range(int(colopts[0]), int(colopts[1]) + 1)
            else:
                cols += [int(d) - 1]
        options.columns = cols

    if args:
        if args[0] == "-":
            infile = sys.stdin
        else:
            infile = open(args[0], "r")
    else:
        infile = sys.stdin

    if options.truncate:
        xr = options.xrange
    else:
        xr = None

    data, legend = IOTools.readTable(infile,
                                     numeric_type=numpy.float,
                                     take=options.columns,
                                     headers=options.headers,
                                     truncate=xr)

    if infile != sys.stdin:
        infile.close()
    if len(data) == 0:  # or data is None:
        E.info("empty table: no plot")
        E.Stop()
        return

    nrows, ncols = data.shape

    # note: because of MA, iteration makes copy of slices
    # Solution: inplace edits.
    if options.cumulate:
        if options.add_error_bars:
            raise "can not add error bars to cumulative histogram."
        if data.mask.any():
            # cumsum does not work with masked arrays, so do it manually
            for y in range(1, ncols):
                c = 0
                for x in range(0, nrows):
                    if not data.mask[x, y]:
                        data[x, y] += c
                        c = data[x, y]
        else:
            for x in range(1, ncols):
                data[:, x] = data[:, x].cumsum()

    elif options.reverse_cumulate:
        if options.add_error_bars:
            raise "can not add error bars to cumulative histogram."
        if data.mask.any():
            l = [0] * ncols
            for x in range(nrows - 1, -1, -1):
                for y in range(1, ncols):
                    if not data.mask[x, y]:
                        data[x, y] += l[y]
                        l[y] = data[x, y]
        else:
            l = [0] * ncols
            for x in range(nrows - 1, -1, -1):
                for y in range(1, ncols):
                    data[x, y] += l[y]
                    l[y] = data[x, y]

    if options.normalize:
        if options.add_error_bars:
            raise "can not add error bars to normalized histogram."
        if data.mask.any():
            m = [0] * ncols
            for x in range(nrows):
                for y in range(1, ncols):
                    if not data.mask[x, y]:
                        m[y] = max(m[y], float(data[x, y]))

            for y in range(1, ncols):
                if m[y] == 0:
                    m[y] = 1.0

            for x in range(nrows):
                for y in range(1, ncols):
                    data[x, y] = data[x, y] / m[y]
        else:
            for x in range(1, ncols):
                m = float(data[:, x].max())
                data[:, x] /= m

    if options.legend:
        legend = options.legend.split(",")

    if options.dump:
        for d in data:
            print d

    if options.title:
        pylab.title(options.title)

    if options.xtitle:
        pylab.xlabel(options.xtitle)
    else:
        pylab.xlabel(legend[0])

    if options.ytitle:
        pylab.ylabel(options.ytitle)

    lines = []
    # use dummy_lines to workaround a bug in errorbars that
    # causes the line styles to be set incorrectly.
    dummy_lines = []
    new_legend = []

    if options.error_bars:
        if options.error_bars == "interleaved":
            step_size = 2
            max_size = len(legend)
        elif options.error_bars == "blocked":
            step_size = 1
            max_size = (len(legend) - 1) / 2
    else:
        step_size = 1
        max_size = len(legend)

    if options.stacked:
        dummy_lines, new_legend = doStackedPlot(data, legend)
    else:
        nplotted = 0
        nskipped = 0
        for x in range(1, max_size, step_size):

            s = options.symbols[nplotted % len(options.symbols)]

            yvals = data[:, x]

            xvals = numpy.ma.masked_array(data[:, 0], numpy.ma.getmask(yvals))

            xvals = xvals.compressed()
            yvals = yvals.compressed()

            if len(xvals) == 0:
                E.warn("skipped empty column %i: %s" % (x, legend[x]))

            if options.error_bars == "interleaved":
                yerr = data[:, x + 1]
                yerr = yerr.compressed()
            else:
                yerr = None

            lines.append(pylab.errorbar(xvals, yvals, yerr=yerr, fmt=s))

            dummy_lines.append(pylab.plot(xvals, yvals, s))

            new_legend.append(legend[x])

            nplotted += 1

        E.info("nplotted=%i, nskipped=%i" % (nplotted, nskipped))

    if len(lines) == 0:
        E.Stop()
        return

    if options.legend_location != "none":
        pylab.figlegend(dummy_lines, new_legend, options.legend_location)

    if options.logscale:
        if "x" in options.logscale:
            pylab.gca().set_xscale('log')
        if "y" in options.logscale:
            pylab.gca().set_yscale('log')

    if options.xrange:
        pylab.xlim(options.xrange)

    if options.yrange:
        pylab.ylim(options.yrange)

    if options.function:
        xstart, xend = pylab.gca().get_xlim()
        increment = (xend - xstart) / 100.0
        exec("f = lambda x: %s" % options.function) in locals()
        xvals, yvals = [], []
        for x in range(0, 100):
            xvals.append(xstart)
            yvals.append(f(xstart))
            xstart += increment
        xvals.append(xstart)
        yvals.append(f(xstart))

        pylab.plot(xvals, yvals)

    if options.hardcopy:
        pylab.savefig(os.path.expanduser(options.hardcopy), dpi=options.dpi)
    else:
        pylab.show()

    E.Stop()