コード例 #1
0
ファイル: Stats_test.py プロジェクト: logust79/cgat-apps
 def check(self, method):
     '''check for length equality and elementwise equality.'''
     a = R['p.adjust'](self.pvalues, method=method)
     b = Stats.adjustPValues(self.pvalues, method=method)
     self.assertEqual(len(a), len(b))
     for x, y in zip(a, b):
         self.assertAlmostEqual(x, y)
コード例 #2
0
ファイル: Stats_test.py プロジェクト: lesheng/cgat
 def check(self, method):
     '''check for length equality and elementwise equality.'''
     a = R['p.adjust'](self.pvalues, method=method)
     b = Stats.adjustPValues(self.pvalues, method=method)
     self.assertEqual(len(a), len(b))
     for x, y in zip(a, b):
         self.assertAlmostEqual(x, y)
コード例 #3
0
ファイル: table2table.py プロジェクト: Charlie-George/cgat
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: table2table.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-m", "--method", dest="methods", type="choice", action="append",
                      choices=("transpose", "normalize-by-max", "normalize-by-value", "multiply-by-value",
                               "percentile", "remove-header", "normalize-by-table",
                               "upper-bound", "lower-bound", "kullback-leibler",
                               "expand", "compress", "fdr", "grep"),
                      help="""actions to perform on table.""")

    parser.add_option("-s", "--scale", dest="scale", type="float",
                      help="factor to scale matrix by.")

    parser.add_option("-f", "--format", dest="format", type="string",
                      help="output number format.")

    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="Parameters for various functions.")

    parser.add_option("-t", "--headers", dest="has_headers", action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--transpose", dest="transpose", action="store_true",
                      help="transpose table.")

    parser.add_option("--set-transpose-field", dest="set_transpose_field", type="string",
                      help="set first field (row 1 and col 1) to this value [%default].")

    parser.add_option("--transpose-format", dest="transpose_format", type="choice",
                      choices=("default", "separated", ),
                      help="input format of un-transposed table")

    parser.add_option("--expand", dest="expand_table", action="store_true",
                      help="expand table - multi-value cells with be expanded over several rows.")

    parser.add_option("--no-headers", dest="has_headers", action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("--columns", dest="columns", type="string",
                      help="columns to use.")

    parser.add_option("--file", dest="file", type="string",
                      help="columns to test from table.",
                      metavar="FILE")

    parser.add_option("-d", "--delimiter", dest="delimiter", type="string",
                      help="delimiter of columns.",
                      metavar="DELIM")

    parser.add_option("-V", "--invert-match", dest="invert_match", action="store_true",
                      help="invert match.")

    parser.add_option("--sort-by-rows", dest="sort_rows", type="string",
                      help="output order for rows.")

    parser.add_option("-a", "--value", dest="value", type="float",
                      help="value to use for various algorithms.")

    parser.add_option("--group", dest="group_column", type="int",
                      help="group values by column. Supply an integer column [default=%default]")

    parser.add_option("--group-function", dest="group_function", type="choice",
                      choices=(
                          "min", "max", "sum", "mean", "stats", "cat", "uniq"),
                      help="function to group values by.")

    parser.add_option("--join-table", dest="join_column", type="int",
                      help="join rows in a table by columns.")

    parser.add_option("--collapse-table", dest="collapse_table", type="string",
                      help="collapse a table. Value determines the missing variable [%default].")

    parser.add_option("--join-column-name", dest="join_column_name", type="int",
                      help="use this column as a prefix.")

    parser.add_option("--flatten-table", dest="flatten_table", action="store_true",
                      help="flatten a table [%default].")

    parser.add_option("--as-column", dest="as_column", action="store_true",
                      help="output table as a single column.")

    parser.add_option("--split-fields", dest="split_fields", action="store_true",
                      help="split fields.")

    parser.add_option("--separator", dest="separator", type="string",
                      help="separator for multi-valued fields [default=%default].")

    parser.add_option("--fdr-method", dest="fdr_method", type="choice",
                      choices=(
                          "BH", "bonferroni", "holm", "hommel", "hochberg", "BY"),
                      help="method to perform multiple testing correction by controlling the fdr [default=%default].")

    parser.add_option("--fdr-add-column", dest="fdr_add_column", type="string",
                      help="add new column instead of replacing existing columns. "
                      "The value of the option will be used as prefix if there are multiple columns [%default]")

    # IMS: add option to use a column as the row id in flatten
    parser.add_option("--id-column", dest="id_column", type="string",
                      help="list of column(s) to use as the row id when flattening the table. "
                      "If None, then row number is used. [default=%default].")

    parser.add_option("--variable-name", dest="variable_name", type="string",
                      help="the column header for the 'variable' column when flattening [default=%default].")

    parser.add_option("--value-name", dest="value_name", type="string",
                      help="the column header for the 'value' column when flattening [default=%default].")

    parser.set_defaults(
        methods=[],
        scale=1.0,
        has_headers=True,
        format="%5.2f",
        value=0.0,
        parameters="",
        columns="all",
        transpose=False,
        set_transpose_field=None,
        transpose_format="default",
        group=False,
        group_column=0,
        group_function="mean",
        missing_value="na",
        sort_rows=None,
        flatten_table=False,
        collapse_table=None,
        separator=";",
        expand=False,
        join_column=None,
        join_column_name=None,
        compute_fdr=None,
        as_column=False,
        fdr_method="BH",
        fdr_add_column=None,
        id_column=None,
        variable_name="column",
        value_name="value",
        file=None,
        delimiter="\t",
        invert_match=False,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    options.parameters = options.parameters.split(",")

    if options.group_column:
        options.group = True
        options.group_column -= 1

    ######################################################################
    ######################################################################
    ######################################################################
    # if only to remove header, do this quickly
    if options.methods == ["remove-header"]:

        first = True
        for line in options.stdin:
            if line[0] == "#":
                continue
            if first:
                first = False
                continue
            options.stdout.write(line)

    elif options.transpose or "transpose" in options.methods:

        readAndTransposeTable(options.stdin, options)

    elif options.flatten_table:
        # IMS: bug fixed to make work. Also added options for keying on a particular
        #     and adding custom column headings

        fields, table = CSV.ReadTable(
            options.stdin, with_header=options.has_headers, as_rows=True)

        options.columns = getColumns(fields, options.columns)

        if options.id_column:
            id_columns = map(
                lambda x: int(x) - 1, options.id_column.split(","))
            id_header = "\t".join([fields[id_column]
                                   for id_column in id_columns])
            options.columns = [
                x for x in options.columns if x not in id_columns]
        else:
            id_header = "row"

        options.stdout.write(
            "%s\t%s\t%s\n" % (id_header, options.variable_name, options.value_name))

        for x, row in enumerate(table):

            if options.id_column:
                row_id = "\t".join([row[int(x) - 1]
                                    for x in options.id_column.split(",")])
            else:
                row_id = str(x)

            for y in options.columns:
                options.stdout.write(
                    "%s\t%s\t%s\n" % (row_id, fields[y], row[y]))

    elif options.as_column:

        fields, table = CSV.ReadTable(
            options.stdin, with_header=options.has_headers, as_rows=True)
        options.columns = getColumns(fields, options.columns)
        table = zip(*table)

        options.stdout.write("value\n")

        for column in options.columns:
            options.stdout.write("\n".join(table[column]) + "\n")

    elif options.split_fields:

        # split comma separated fields
        fields, table = CSV.ReadTable(options.stdin,
                                      with_header=options.has_headers,
                                      as_rows=True)

        options.stdout.write("%s\n" % ("\t".join(fields)))

        for row in table:
            row = [x.split(options.separator) for x in row]
            for d in itertools.product(*row):
                options.stdout.write("%s\n" % "\t".join(d))

    elif options.group:
        readAndGroupTable(options.stdin, options)

    elif options.join_column:
        readAndJoinTable(options.stdin, options)

    elif options.expand_table:
        readAndExpandTable(options.stdin, options)

    elif options.collapse_table is not None:
        readAndCollapseTable(options.stdin, options, options.collapse_table)

    elif "grep" in options.methods:

        options.columns = map(lambda x: int(x) - 1, options.columns.split(","))

        patterns = []

        if options.file:
            infile = open(options.file, "r")
            for line in infile:
                if line[0] == "#":
                    continue
                patterns.append(line[:-1].split(options.delimiter)[0])
        else:
            patterns = args

        for line in options.stdin:

            data = line[:-1].split(options.delimiter)
            found = False

            for c in options.columns:

                if data[c] in patterns:
                    found = True
                    break

            if (not found and options.invert_match) or (found and not options.invert_match):
                print line[:-1]
    else:

        ######################################################################
        ######################################################################
        ######################################################################
        # Apply remainder of transformations
        fields, table = CSV.ReadTable(
            options.stdin, with_header=options.has_headers, as_rows=False)
        # convert columns to list
        table = [list(x) for x in table]

        ncols = len(fields)
        if len(table) == 0:
            raise ValueError("table is empty")

        nrows = len(table[0])

        E.info("processing table with %i rows and %i columns" % (nrows, ncols))

        options.columns = getColumns(fields, options.columns)

        # convert all values to float
        for c in options.columns:
            for r in range(nrows):
                try:
                    table[c][r] = float(table[c][r])
                except ValueError:
                    continue

        for method in options.methods:

            if method == "normalize-by-value":

                value = float(options.parameters[0])
                del options.parameters[0]

                for c in options.columns:
                    table[c] = map(lambda x: x / value, table[c])

            elif method == "multiply-by-value":

                value = float(options.parameters[0])
                del options.parameters[0]

                for c in options.columns:
                    table[c] = map(lambda x: x * value, table[c])

            elif method == "normalize-by-max":

                for c in options.columns:
                    m = max(table[c])
                    table[c] = map(lambda x: x / m, table[c])

            elif method == "kullback-leibler":
                options.stdout.write("category1\tcategory2\tkl1\tkl2\tmean\n")
                for x in range(0, len(options.columns) - 1):
                    for y in range(x + 1, len(options.columns)):
                        c1 = options.columns[x]
                        c2 = options.columns[y]
                        e1 = 0
                        e2 = 0
                        for z in range(nrows):
                            p = table[c1][z]
                            q = table[c2][z]
                            e1 += p * math.log(p / q)
                            e2 += q * math.log(q / p)

                        options.stdout.write("%s\t%s\t%s\t%s\t%s\n" % (fields[c1], fields[c2],
                                                                       options.format % e1,
                                                                       options.format % e2,
                                                                       options.format % ((e1 + e2) / 2)))
                E.Stop()
                sys.exit(0)

            elif method == "rank":

                for c in options.columns:
                    tt = table[c]
                    t = zip(tt, range(nrows))
                    t.sort()
                    for i, n in zip(map(lambda x: x[1], t), range(nrows)):
                        tt[i] = n

            elif method in ("lower-bound", "upper-bound"):

                boundary = float(options.parameters[0])
                del options.parameters[0]
                new_value = float(options.parameters[0])
                del options.parameters[0]

                if method == "upper-bound":
                    for c in options.columns:
                        for r in range(nrows):
                            if isinstance(table[c][r], float) and \
                                    table[c][r] > boundary:
                                table[c][r] = new_value
                else:
                    for c in options.columns:
                        for r in range(nrows):
                            if isinstance(table[c][r], float) and \
                                    table[c][r] < boundary:
                                table[c][r] = new_value

            elif method == "fdr":
                pvalues = []
                for c in options.columns:
                    pvalues.extend(table[c])

                assert max(pvalues) <= 1.0, "pvalues > 1 in table: max=%s" % str(
                    max(pvalues))
                assert min(pvalues) >= 0, "pvalue < 0 in table: min=%s" % str(
                    min(pvalues))

                # convert to str to avoid test for float downstream
                qvalues = map(
                    str, Stats.adjustPValues(pvalues, method=options.fdr_method))

                if options.fdr_add_column is None:
                    x = 0
                    for c in options.columns:
                        table[c] = qvalues[x:x + nrows]
                        x += nrows
                else:
                    # add new column headers

                    if len(options.columns) == 1:
                        fields.append(options.fdr_add_column)
                    else:
                        for co in options.columns:
                            fields.append(options.fdr_add_column + fields[c])

                    x = 0
                    for c in options.columns:
                        # add a new column
                        table.append(qvalues[x:x + nrows])
                        x += nrows
                    ncols += len(options.columns)

            elif method == "normalize-by-table":

                other_table_name = options.parameters[0]
                del options.parameters[0]
                other_fields, other_table = CSV.ReadTable(
                    open(other_table_name, "r"),
                    with_header=options.has_headers,
                    as_rows=False)

                # convert all values to float
                for c in options.columns:
                    for r in range(nrows):
                        try:
                            other_table[c][r] = float(other_table[c][r])
                        except ValueError:
                            continue

                # set 0s to 1 in the other matrix
                for c in options.columns:
                    for r in range(nrows):
                        if isinstance(table[c][r], float) and \
                                isinstance(other_table[c][r], float) and \
                                other_table[c][r] != 0:
                            table[c][r] /= other_table[c][r]
                        else:
                            table[c][r] = options.missing_value

        # convert back
        for c in options.columns:
            for r in range(nrows):
                if isinstance(table[c][r], float):
                    table[c][r] = options.format % table[c][r]

        options.stdout.write("\t".join(fields) + "\n")
        if options.sort_rows:
            old2new = {}
            for r in range(nrows):
                old2new[table[0][r]] = r
            for x in options.sort_rows.split(","):
                if x not in old2new:
                    continue
                r = old2new[x]
                options.stdout.write(
                    "\t".join([table[c][r] for c in range(ncols)]) + "\n")
        else:
            for r in range(nrows):
                options.stdout.write(
                    "\t".join([table[c][r] for c in range(ncols)]) + "\n")

    E.Stop()
コード例 #4
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-m",
        "--method",
        dest="methods",
        type="choice",
        action="append",
        choices=("transpose", "normalize-by-max", "normalize-by-value",
                 "multiply-by-value", "percentile", "remove-header",
                 "normalize-by-table", "upper-bound", "lower-bound",
                 "kullback-leibler", "expand", "compress", "fdr", "grep"),
        help="""actions to perform on table.""")

    parser.add_option("-s",
                      "--scale",
                      dest="scale",
                      type="float",
                      help="factor to scale matrix by.")

    parser.add_option("-f",
                      "--format",
                      dest="format",
                      type="string",
                      help="output number format [default]")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="Parameters for various functions.")

    parser.add_option("-t",
                      "--header-names",
                      dest="has_headers",
                      action="store_true",
                      help="matrix has row/column headers.")

    parser.add_option("--transpose",
                      dest="transpose",
                      action="store_true",
                      help="transpose table.")

    parser.add_option(
        "--set-transpose-field",
        dest="set_transpose_field",
        type="string",
        help="set first field (row 1 and col 1) to this value [%default].")

    parser.add_option("--transpose-format",
                      dest="transpose_format",
                      type="choice",
                      choices=(
                          "default",
                          "separated",
                      ),
                      help="input format of un-transposed table")

    parser.add_option(
        "--expand",
        dest="expand_table",
        action="store_true",
        help="expand table - multi-value cells with be expanded over "
        "several rows.")

    parser.add_option("--no-headers",
                      dest="has_headers",
                      action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option("--columns",
                      dest="columns",
                      type="string",
                      help="columns to use.")

    parser.add_option("--file",
                      dest="file",
                      type="string",
                      help="columns to test from table.",
                      metavar="FILE")

    parser.add_option("-d",
                      "--delimiter",
                      dest="delimiter",
                      type="string",
                      help="delimiter of columns.",
                      metavar="DELIM")

    parser.add_option("-V",
                      "--invert-match",
                      dest="invert_match",
                      action="store_true",
                      help="invert match.")

    parser.add_option("--sort-by-rows",
                      dest="sort_rows",
                      type="string",
                      help="output order for rows.")

    parser.add_option("-a",
                      "--value",
                      dest="value",
                      type="float",
                      help="value to use for various algorithms.")

    parser.add_option("--group",
                      dest="group_column",
                      type="int",
                      help="group values by column. Supply an integer column "
                      "[default=%default]")

    parser.add_option("--group-function",
                      dest="group_function",
                      type="choice",
                      choices=("min", "max", "sum", "mean", "stats", "cat",
                               "uniq"),
                      help="function to group values by.")

    parser.add_option("--join-table",
                      dest="join_column",
                      type="int",
                      help="join rows in a table by columns.")

    parser.add_option(
        "--collapse-table",
        dest="collapse_table",
        type="string",
        help="collapse a table. Value determines the missing variable "
        "[%default].")

    parser.add_option("--join-column-name",
                      dest="join_column_name",
                      type="int",
                      help="use this column as a prefix.")

    parser.add_option("--flatten-table",
                      dest="flatten_table",
                      action="store_true",
                      help="flatten a table [%default].")

    parser.add_option("--as-column",
                      dest="as_column",
                      action="store_true",
                      help="output table as a single column.")

    parser.add_option("--split-fields",
                      dest="split_fields",
                      action="store_true",
                      help="split fields.")

    parser.add_option(
        "--separator",
        dest="separator",
        type="string",
        help="separator for multi-valued fields [default=%default].")

    parser.add_option(
        "--fdr-method",
        dest="fdr_method",
        type="choice",
        choices=("BH", "bonferroni", "holm", "hommel", "hochberg", "BY"),
        help="method to perform multiple testing correction by controlling "
        "the fdr [default=%default].")

    parser.add_option(
        "--fdr-add-column",
        dest="fdr_add_column",
        type="string",
        help="add new column instead of replacing existing columns. "
        "The value of the option will be used as prefix if there are "
        "multiple columns [%default]")

    # IMS: add option to use a column as the row id in flatten
    parser.add_option(
        "--id-column",
        dest="id_column",
        type="string",
        help="list of column(s) to use as the row id when flattening "
        "the table. If None, then row number is used. [default=%default].")

    parser.add_option(
        "--variable-name",
        dest="variable_name",
        type="string",
        help="the column header for the 'variable' column when flattening "
        "[default=%default].")

    parser.add_option(
        "--value-name",
        dest="value_name",
        type="string",
        help="the column header for the 'value' column when flattening "
        "[default=%default].")

    parser.set_defaults(
        methods=[],
        scale=1.0,
        has_headers=True,
        format=None,
        value=0.0,
        parameters="",
        columns="all",
        transpose=False,
        set_transpose_field=None,
        transpose_format="default",
        group=False,
        group_column=0,
        group_function="mean",
        missing_value="na",
        sort_rows=None,
        flatten_table=False,
        collapse_table=None,
        separator=";",
        expand=False,
        join_column=None,
        join_column_name=None,
        compute_fdr=None,
        as_column=False,
        fdr_method="BH",
        fdr_add_column=None,
        id_column=None,
        variable_name="column",
        value_name="value",
        file=None,
        delimiter="\t",
        invert_match=False,
    )

    (options, args) = E.start(parser, add_pipe_options=True)

    options.parameters = options.parameters.split(",")

    if options.group_column:
        options.group = True
        options.group_column -= 1

    ######################################################################
    ######################################################################
    ######################################################################
    # if only to remove header, do this quickly
    if options.methods == ["remove-header"]:

        first = True
        for line in options.stdin:
            if line[0] == "#":
                continue
            if first:
                first = False
                continue
            options.stdout.write(line)

    elif options.transpose or "transpose" in options.methods:

        readAndTransposeTable(options.stdin, options)

    elif options.flatten_table:
        # IMS: bug fixed to make work. Also added options for keying
        # on a particular and adding custom column headings

        fields, table = CSV.readTable(options.stdin,
                                      with_header=options.has_headers,
                                      as_rows=True)

        options.columns = getColumns(fields, options.columns)

        if options.id_column:
            id_columns = [int(x) - 1 for x in options.id_column.split(",")]
            id_header = "\t".join(
                [fields[id_column] for id_column in id_columns])
            options.columns = [
                x for x in options.columns if x not in id_columns
            ]
        else:
            id_header = "row"

        options.stdout.write(
            "%s\t%s\t%s\n" %
            (id_header, options.variable_name, options.value_name))

        for x, row in enumerate(table):

            if options.id_column:
                row_id = "\t".join(
                    [row[int(x) - 1] for x in options.id_column.split(",")])
            else:
                row_id = str(x)

            for y in options.columns:
                options.stdout.write("%s\t%s\t%s\n" %
                                     (row_id, fields[y], row[y]))

    elif options.as_column:

        fields, table = CSV.readTable(options.stdin,
                                      with_header=options.has_headers,
                                      as_rows=True)
        options.columns = getColumns(fields, options.columns)
        table = list(zip(*table))

        options.stdout.write("value\n")

        for column in options.columns:
            options.stdout.write("\n".join(table[column]) + "\n")

    elif options.split_fields:

        # split comma separated fields
        fields, table = CSV.readTable(options.stdin,
                                      with_header=options.has_headers,
                                      as_rows=True)

        options.stdout.write("%s\n" % ("\t".join(fields)))

        for row in table:
            row = [x.split(options.separator) for x in row]
            for d in itertools.product(*row):
                options.stdout.write("%s\n" % "\t".join(d))

    elif options.group:
        readAndGroupTable(options.stdin, options)

    elif options.join_column:
        readAndJoinTable(options.stdin, options)

    elif options.expand_table:
        readAndExpandTable(options.stdin, options)

    elif options.collapse_table is not None:
        readAndCollapseTable(options.stdin, options, options.collapse_table)

    elif "grep" in options.methods:

        options.columns = [int(x) - 1 for x in options.columns.split(",")]

        patterns = []

        if options.file:
            infile = IOTools.open_file(options.file, "r")
            for line in infile:
                if line[0] == "#":
                    continue
                patterns.append(line[:-1].split(options.delimiter)[0])
        else:
            patterns = args

        for line in options.stdin:

            data = line[:-1].split(options.delimiter)
            found = False

            for c in options.columns:

                if data[c] in patterns:
                    found = True
                    break

            if (not found and options.invert_match) or (
                    found and not options.invert_match):
                print(line[:-1])
    else:

        ######################################################################
        ######################################################################
        ######################################################################
        # Apply remainder of transformations
        fields, table = CSV.readTable(options.stdin,
                                      with_header=options.has_headers,
                                      as_rows=False)
        # convert columns to list
        table = [list(x) for x in table]

        ncols = len(fields)
        if len(table) == 0:
            raise ValueError("table is empty")

        nrows = len(table[0])

        E.info("processing table with %i rows and %i columns" % (nrows, ncols))

        options.columns = getColumns(fields, options.columns)

        # convert all values to float
        for c in options.columns:
            for r in range(nrows):
                try:
                    table[c][r] = float(table[c][r])
                except ValueError:
                    continue

        for method in options.methods:

            if method == "normalize-by-value":

                value = float(options.parameters[0])
                del options.parameters[0]

                for c in options.columns:
                    table[c] = [x / value for x in table[c]]

            elif method == "multiply-by-value":

                value = float(options.parameters[0])
                del options.parameters[0]

                for c in options.columns:
                    table[c] = [x * value for x in table[c]]

            elif method == "normalize-by-max":

                for c in options.columns:
                    m = max(table[c])
                    table[c] = [x / m for x in table[c]]

            elif method == "kullback-leibler":
                options.stdout.write("category1\tcategory2\tkl1\tkl2\tmean\n")
                format = options.format
                if format is None:
                    format = "%f"

                for x in range(0, len(options.columns) - 1):
                    for y in range(x + 1, len(options.columns)):
                        c1 = options.columns[x]
                        c2 = options.columns[y]
                        e1 = 0
                        e2 = 0
                        for z in range(nrows):
                            p = table[c1][z]
                            q = table[c2][z]
                            e1 += p * math.log(p / q)
                            e2 += q * math.log(q / p)

                        options.stdout.write(
                            "%s\t%s\t%s\t%s\t%s\n" %
                            (fields[c1], fields[c2], format % e1, format % e2,
                             format % ((e1 + e2) / 2)))
                E.stop()
                sys.exit(0)

            elif method == "rank":

                for c in options.columns:
                    tt = table[c]
                    t = list(zip(tt, list(range(nrows))))
                    t.sort()
                    for i, n in zip([x[1] for x in t], list(range(nrows))):
                        tt[i] = n

            elif method in ("lower-bound", "upper-bound"):

                boundary = float(options.parameters[0])
                del options.parameters[0]
                new_value = float(options.parameters[0])
                del options.parameters[0]

                if method == "upper-bound":
                    for c in options.columns:
                        for r in range(nrows):
                            if isinstance(table[c][r], float) and \
                                    table[c][r] > boundary:
                                table[c][r] = new_value
                else:
                    for c in options.columns:
                        for r in range(nrows):
                            if isinstance(table[c][r], float) and \
                                    table[c][r] < boundary:
                                table[c][r] = new_value

            elif method == "fdr":
                pvalues = []
                for c in options.columns:
                    pvalues.extend(table[c])

                assert max(pvalues) <= 1.0, "pvalues > 1 in table: max=%s" % \
                    str(max(pvalues))
                assert min(pvalues) >= 0, "pvalue < 0 in table: min=%s" % \
                    str(min(pvalues))

                # convert to str to avoid test for float downstream
                qvalues = list(
                    map(
                        str,
                        Stats.adjustPValues(pvalues,
                                            method=options.fdr_method)))

                if options.fdr_add_column is None:
                    x = 0
                    for c in options.columns:
                        table[c] = qvalues[x:x + nrows]
                        x += nrows
                else:
                    # add new column headers
                    if len(options.columns) == 1:
                        fields.append(options.fdr_add_column)
                    else:
                        for co in options.columns:
                            fields.append(options.fdr_add_column + fields[c])

                    x = 0
                    for c in options.columns:
                        # add a new column
                        table.append(qvalues[x:x + nrows])
                        x += nrows
                    ncols += len(options.columns)

            elif method == "normalize-by-table":

                other_table_name = options.parameters[0]
                del options.parameters[0]
                other_fields, other_table = CSV.readTable(
                    IOTools.open_file(other_table_name, "r"),
                    with_header=options.has_headers,
                    as_rows=False)

                # convert all values to float
                for c in options.columns:
                    for r in range(nrows):
                        try:
                            other_table[c][r] = float(other_table[c][r])
                        except ValueError:
                            continue

                # set 0s to 1 in the other matrix
                for c in options.columns:
                    for r in range(nrows):
                        if isinstance(table[c][r], float) and \
                                isinstance(other_table[c][r], float) and \
                                other_table[c][r] != 0:
                            table[c][r] /= other_table[c][r]
                        else:
                            table[c][r] = options.missing_value

        # convert back
        if options.format is not None:
            for c in options.columns:
                for r in range(nrows):
                    if isinstance(table[c][r], float):
                        table[c][r] = format % table[c][r]

        options.stdout.write("\t".join(fields) + "\n")
        if options.sort_rows:
            old2new = {}
            for r in range(nrows):
                old2new[table[0][r]] = r
            for x in options.sort_rows.split(","):
                if x not in old2new:
                    continue
                r = old2new[x]
                options.stdout.write(
                    "\t".join(map(str, [table[c][r]
                                        for c in range(ncols)])) + "\n")
        else:
            for r in range(nrows):
                options.stdout.write(
                    "\t".join(map(str, [table[c][r]
                                        for c in range(ncols)])) + "\n")

    E.stop()
コード例 #5
0
ファイル: table2table.py プロジェクト: siping/cgat
                else:
                    for c in options.columns:                
                        for r in range(nrows):
                            if type(table[c][r]) == types.FloatType and \
                                   table[c][r] < boundary:
                                table[c][r] = new_value

            elif method == "fdr":
                pvalues = []
                for c in options.columns: pvalues.extend( table[c] )

                assert max(pvalues) <= 1.0, "pvalues > 1 in table"
                assert min(pvalues) >= 0, "pvalue < 0 in table"

                # convert to str to avoid test for float downstream
                qvalues = map(str, Stats.adjustPValues( pvalues, method = options.fdr_method ))

                x = 0
                for c in options.columns: 
                    table[c] = qvalues[x:x+nrows]
                    x += nrows

            elif method == "normalize-by-table":

                other_table_name = options.parameters[0]
                del options.parameters[0]
                other_fields, other_table  = CSV.ReadTable( open(other_table_name, "r"), with_header = options.has_headers, as_rows = False )

                # convert all values to float
                for c in options.columns:
                    for r in range(nrows):