Python CSV.ReadTable Beispiele

Programmiersprache: Python

Namespace / Paketname: CGAT

Klasse / Typ: CSV

Methode / Funktion: ReadTable

Beispiele auf hotexamples.com: 14

Python CSV.ReadTable - 14 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die CGAT.CSV.ReadTable, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

ReadTable(14)

readTable(12)

DictReader(7)

ConvertDictionary(4)

CommentStripper(3)

DictReaderLarge(2)

UnicodeDictReader(2)

GetMapColumn2Type(1)

GroupTable(1)

getConvertedTable(1)

getMapColumn2Type(1)

groupTable(1)

Beispiel #1

Datei anzeigen

Datei: codonbias_weights2tsv.py Projekt: yangjl/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: codonbias_weights2tsv.py 2781 2009-09-10 11:33:14Z andreas $"
    )

    parser.add_option("--methods",
                      dest="methods",
                      type="string",
                      help="methods to apply.")

    parser.add_option("--is-frequencies",
                      dest="is_frequencies",
                      action="store_true",
                      help="data is frequencies (default: weights).")

    parser.add_option("-s",
                      "--sort",
                      dest="sort",
                      type="choice",
                      choices=("percent-difference", "aa"),
                      help="sort order of output table.")

    parser.add_option(
        "-g",
        "--global-sort",
        dest="global_sort",
        action="store_true",
        help="globally sort results (otherwise: by species pair).")

    parser.set_defaults( \
       methods = "",
       is_frequencies = False,
       sort = "percent-difference",
       global_sort= False,
       )

    (options, args) = E.Start(parser)
    if options.methods:
        options.methods = options.methods.split(",")

    fields, table = CSV.ReadTable(sys.stdin)

    ## convert weights to floats
    table = CSV.getConvertedTable(table, range(1, len(fields)))

    for method in options.methods:

        if method == "overview":
            if options.is_frequencies:
                WriteOverviewFrequencies(fields, table, options)
            else:
                WriteOverviewWeights(fields, table, options)

Beispiel #2

Datei anzeigen

Datei: table2table.py Projekt: yangjl/cgat

def readAndExpandTable( infile, options ):
    '''splits fields in table at separator. 
    
    If a field in a row contains multiple values,
    the row is expanded into multiple rows such
    that all values have space.
    '''

    fields, table  = CSV.ReadTable( infile, with_header = options.has_headers, as_rows = True )

    options.stdout.write("\t".join(fields) + "\n")
    
    for row in table:

        data = []
        for x in range(len(fields)):
            data.append( row[x].split( options.separator ) )

        nrows = max( [ len(d) for d in data ] )

        for d in data:
            d += [""] * (nrows - len(d))

        for n in range(nrows):
            options.stdout.write( "\t".join( [ d[n] for d in data ] ) + "\n" )

Beispiel #3

Datei anzeigen

Datei: table2table.py Projekt: yangjl/cgat

def readAndGroupTable( infile, options ):
    """read table from infile and group.
    """
    fields, table  = CSV.ReadTable( infile, with_header = options.has_headers, as_rows = True )
    options.columns = getColumns( fields, options.columns )
    assert options.group_column not in options.columns

    converter = float
    new_fields = [ fields[options.group_column] ] + [ fields[x] for x in options.columns ]

    if options.group_function == "min":
        f = min
    elif options.group_function == "max":
        f = max
    elif options.group_function == "sum":
        f = lambda z: reduce( lambda x,y: x+y, z)
    elif options.group_function == "mean":
        f = scipy.mean
    elif options.group_function == "cat":
        f = lambda x: ";".join( [ y for y in x if y != "" ] )
        converter = str
    elif options.group_function == "uniq":
        f = lambda x: ";".join( [ y for y in set(x) if y != "" ] )
        converter = str
    elif options.group_function == "stats":
        f = lambda x: str(Stats.DistributionalParameters(x))
        # update headers
        new_fields = [ fields[options.group_column] ]
        for c in options.columns:
            new_fields += list( map(lambda x: "%s_%s" % (fields[c], x), Stats.DistributionalParameters().getHeaders() ) )

    ## convert values to floats (except for group_column)
    ## Delete rows with unconvertable values and not in options.columns
    new_table = []
    for row in table:
        skip = False
        new_row = [ row[options.group_column] ]

        for c in options.columns:
            if row[c] == options.missing_value:
                new_row.append(row[c])
            else:
                try:
                    new_row.append( converter(row[c]) )
                except ValueError:
                    skip = True
                    break
        if not skip: new_table.append(new_row)
    table = new_table

    new_rows = CSV.GroupTable( table,
                               group_column = 0,
                               group_function = f )

    options.stdout.write("\t".join(new_fields) + "\n")        
    for row in new_rows:
        options.stdout.write( "\t".join( map(str,row) ) + "\n")

Beispiel #4

Datei anzeigen

Datei: PipelineGO.py Projekt: santayana/cgat

def getGODescriptions(infile):
    '''return dictionary mapping GO category to description
    and namespace.
    '''

    with IOTools.openFile(infile) as inf:
        fields, table = CSV.ReadTable(inf, as_rows=False)

    return dict([
        (y, (x, z)) for x, y, z in zip(table[fields.index("go_type")], table[
            fields.index("go_id")], table[fields.index("description")])
    ])

Beispiel #5

Datei anzeigen

def readAndJoinTable(infile, options):

    fields, table = CSV.ReadTable(infile,
                                  with_header=options.has_headers,
                                  as_rows=True)

    join_column = options.join_column - 1
    join_name = options.join_column_name - 1

    join_rows = list(set(map(lambda x: x[join_column], table)))
    join_rows.sort()

    join_names = list(set(map(lambda x: x[join_name], table)))
    join_names.sort()

    join_columns = list(
        set(range(len(fields))).difference(set((join_column, join_name))))
    join_columns.sort()

    new_table = []
    map_old2new = {}

    map_name2start = {}
    x = 1
    for name in join_names:
        map_name2start[name] = x
        x += len(join_columns)

    row_width = len(join_columns) * len(join_names)
    for x in join_rows:
        map_old2new[x] = len(map_old2new)
        new_row = [
            x,
        ] + ["na"] * row_width
        new_table.append(new_row)

    for row in table:
        row_index = map_old2new[row[join_column]]
        start = map_name2start[row[join_name]]
        for x in join_columns:
            new_table[row_index][start] = row[x]
            start += 1

    # print new table
    options.stdout.write(fields[join_column])
    for name in join_names:
        for column in join_columns:
            options.stdout.write("\t%s%s%s" %
                                 (name, options.separator, fields[column]))
    options.stdout.write("\n")

    for row in new_table:
        options.stdout.write("\t".join(row) + "\n")

Beispiel #6

Datei anzeigen

def readAndCollapseTable(infile, options, missing_value=""):
    '''collapse a table.

    Collapse a table of two columns with row names in the first
    column. Outputs a table with multiple columns for each row name.
    '''

    fields, table = CSV.ReadTable(infile,
                                  with_header=options.has_headers,
                                  as_rows=True)

    if len(fields) != 2:
        raise NotImplementedError("can only work on tables with two columns")

    values = collections.defaultdict(list)

    # column header after which to add
    separator = table[0][0]
    row_names = set([x[0] for x in table])

    row_name, value = table[0]

    values[row_name].append(value)
    added = set([row_name])
    for row_name, value in table[1:]:
        if row_name == separator:
            for r in row_names:
                if r not in added:
                    values[r].append(missing_value)
            added = set()

        values[row_name].append(value)
        added.add(row_name)

    for r in row_names:
        if r not in added:
            values[r].append(missing_value)

    sizes = set([len(x) for x in values.values()])
    assert len(sizes) == 1, "unequal number of row_names"
    size = list(sizes)[0]

    options.stdout.write("row\t%s\n" %
                         ("\t".join(["column_%i" % x for x in range(size)])))

    for key, row in values.items():
        options.stdout.write("%s\t%s\n" % (key, "\t".join(row)))

Beispiel #7

Datei anzeigen

Datei: table2table.py Projekt: yangjl/cgat

def computeFDR( infile, options ):
    '''compute FDR on a table.
    '''

    fields, table  = CSV.ReadTable( infile, with_header = options.has_headers, as_rows = True )

    options.stdout.write("\t".join(fields) + "\n")
    
    for row in table:

        data = []
        for x in range(len(fields)):
            data.append( row[x].split( options.separator ) )

        nrows = max( [ len(d) for d in data ] )

        for d in data:
            d += [""] * (nrows - len(d))

        for n in range(nrows):
            options.stdout.write( "\t".join( [ d[n] for d in data ] ) + "\n" )

Beispiel #8

Datei anzeigen

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: optic/analyze_sites_slr.py 2781 2009-09-10 11:33:14Z andreas $"
    )

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=("summary-slr", "summary-filtered",
                               "over-representation", "positive-site-table",
                               "negative-site-table", "neutral-site-table",
                               "positive-site-list", "negative-site-list",
                               "neutral-site-list"),
                      help="method to apply.")

    parser.add_option("--prefix",
                      dest="prefix",
                      type="string",
                      help="prefix for rows.")

    parser.add_option("-s",
                      "--filename-sites",
                      dest="filename_sites",
                      type="string",
                      help="filename with sites information.")

    parser.add_option("-l",
                      "--filename-log",
                      dest="filename_log",
                      type="string",
                      help="filename with logging information.")

    parser.add_option(
        "-m",
        "--filename-mali",
        dest="filename_mali",
        type="string",
        help=
        "filename of multiple alignment, that was input to SLR. If given, is used to filter indels."
    )

    parser.add_option(
        "--filter-probability",
        dest="filter_probability",
        type="float",
        help="threshold for probability above which to include positive sites."
    )

    parser.add_option("--no-header",
                      dest="write_header",
                      action="store_false",
                      help="only output header.")

    parser.add_option("--only-header",
                      dest="only_header",
                      action="store_true",
                      help="only output header.")

    parser.add_option("--significance-threshold",
                      dest="significance_threshold",
                      type="float",
                      help="threshold for significance tests [%default].")

    parser.add_option("--use-adjusted",
                      dest="use_adjusted",
                      action="store_true",
                      help="use SLR adjusted probability values.")

    parser.add_option("--truncate-sites-list",
                      dest="truncate_sites_list",
                      type="int",
                      help="truncate sites list after ## entries (0 for all).")

    parser.add_option(
        "--context-size",
        dest="context_size",
        type="int",
        help="size of left/right context around a selected residue.")

    parser.set_defaults(
        prefix=None,
        filter_probability=0,
        filter_omega=0,
        filename_sites="-",
        filename_log=None,
        filename_mali=None,
        significance_threshold=0.05,
        write_header=True,
        only_header=False,
        use_adjusted=False,
        context_size=0,
        truncate_sites_list=0,
    )

    (options, args) = E.Start(parser)

    slr = WrapperSlr.Slr()

    # write headers
    if "%s" in options.filename_sites:
        options.prefix = True

    if options.method == "summary-slr":

        # write header
        if options.write_header or options.only_header:

            if options.loglevel >= 1:
                options.stdlog.write(
                    """# Numbers of positive/neutral/negative sites according to SLR
#
# This uses the thresholds as set in SLR. Use "counts" for filtering
# residues based on your own thresholds
""")
            thresholds = "95%", "99%", "95% corrected", "99% corrected"

            if options.prefix:
                options.stdout.write("prefix\t")
            options.stdout.write(
                "ltree\tomega\tkappa\tlnL\tnsites\tnsyn\tngap\t")
            options.stdout.write("\t".join(
                map(lambda x: "npos_" + x.replace(" ", "_"), thresholds)))
            options.stdout.write("\t")
            options.stdout.write("\t".join(
                map(lambda x: "nneg_" + x.replace(" ", "_"), thresholds)))
            options.stdout.write("\n")

    elif options.method == "summary-filtered":

        # write header
        if options.write_header or options.only_header:
            if options.loglevel >= 1:
                options.stdlog.write(
                    """# Numbers of positive/neutral/negative sites according to SLR
#
# This method uses the supplied threshold and the multiple alignment to filter.
# All positions that are above the threshold (P-Value) and which are located in
# indels: >= 1 sequence missing from column, are removed.
""")

            if options.prefix:
                options.stdout.write("prefix\t")
            options.stdout.write(
                "ltree\tomega\tkappa\tlnL\tnsites\tnfiltered\tntotal\tnsyn\tnneg\tnpos\n"
            )

    elif options.method in ("positive-site-table", "negative-site-table",
                            "neutral-site-table"):

        # write header
        if options.write_header or options.only_header:
            if options.loglevel >= 1:
                options.stdlog.write(
                    """# Numbers of positive/neutral/negative sites according to SLR
#
# Note: sequence positions are 1-based, but mali positions are 0-based.
# Residues in indel positions have been removed and signifnicance was determined according
# with a threshold of %5.2e
""" % options.significance_threshold)

            if options.prefix:
                options.stdout.write("prefix\t")
            options.stdout.write("cluster\tnsites\tp-value\tsites\n")

    elif options.method in ("positive-site-list", "negative-site-list",
                            "neutral-site-list"):

        # write header
        if options.write_header or options.only_header:
            if options.loglevel >= 1:
                options.stdlog.write(
                    """# Sites under positive/neutral/negative selection according to SLR
#
# Note: sequence positions are 1-based, but mali positions are 0-based.
# Residues in indel positions have been removed and signifnicance was determined according
# with a threshold of %5.2e
""" % options.significance_threshold)

            if options.prefix:
                options.stdout.write("prefix\t")

            options.stdout.write(
                "sequence\tn\taa\tseq_pos\tmali_pos\tcontext\n")

    elif options.method == "over-representation":

        # write header
        if options.write_header or options.only_header:
            if options.loglevel >= 1:
                options.stdlog.write("""# Genes with over-represented sites.
#
# This method uses as input the output of summary-filtered.
""")

    if options.only_header:
        sys.exit(0)

    if options.method in ("summary-slr", "summary-filtered",
                          "positive-site-table", "negative-site-table",
                          "neutral-site-table", "positive-site-list",
                          "negative-site-list", "neutral-site-list"):

        ninput, noutput, nskipped = 0, 0, 0

        if "%s" in options.filename_sites:

            headers, table = CSV.ReadTable(sys.stdin)

            fprefix = headers.index("prefix")

            try:
                fsignificance = headers.index("p")
            except ValueError:
                fsignificance = None

            for row in table:

                id = row[fprefix]
                if fsignificance is not None:
                    p_value = row[fsignificance]
                else:
                    p_value = None

                ninput += 1

                fn = re.sub("%s", id, options.filename_sites)
                if not os.path.exists(fn):
                    nskipped += 1
                    continue

                lines_sites = open(fn, "r").readlines()
                if options.filename_log:
                    lines_log = open(re.sub("%s", id, options.filename_log),
                                     "r").readlines()

                result = slr.parseOutput(lines_sites, lines_log)

                if options.method in ("summary-filtered",
                                      "positive-site-table",
                                      "negative-site-table",
                                      "neutral-site-table"):
                    mali = Mali.Mali()
                    mali.readFromFile(
                        open(re.sub("%s", id, options.filename_mali), "r"))
                else:
                    mali = None

                ProcessResult(result,
                              options,
                              mali,
                              prefix=id,
                              p_value=p_value)
                noutput += 1
        else:
            if options.filename_sites == "-":
                lines_sites = sys.stdin.readlines()
            else:
                lines_sites = open(options.filename_sites, "r").readlines()

            ninput += 1
            if options.filename_log:
                lines_log = open(options.filename_log, "r").readlines()

            result = slr.parseOutput(lines_sites, lines_log)

            if options.filename_mali:
                mali = Mali.Mali()
                mali.readFromFile(open(options.filename_mali, "r"))
            else:
                if options.method == "summary-filtered":
                    raise "please supply a multiple alignment for filtering."

                mali = None

            ProcessResult(result, options, mali, prefix=options.prefix)
            noutput += 1

        if options.loglevel >= 1:
            options.stdlog.write("# ninput=%i, noutput=%i, nskipped=%i.\n" %
                                 (ninput, noutput, nskipped))

    else:
        if options.method == "over-representation":

            results = []
            for line in sys.stdin:
                if line[0] == "#":
                    continue
                data = line[:-1].split("\t")
                if data[0] == "prefix":
                    continue

                results.append(
                    Result(data[0], int(data[6]), int(data[7]), int(data[8]),
                           int(data[9]), int(data[10])))

            # probability of a single site being positive
            ntotal = sum(map(lambda x: x.mNTotal, results))
            npositives = sum(map(lambda x: x.mNPositive, results))
            p = float(npositives) / float(ntotal)

            if options.loglevel >= 1:
                options.stdlog.write("# sites: total=%i, positive=%i, p=%f\n" %
                                     (ntotal, npositives, p))

            new_results = []
            for result in results:
                if result.mNTotal == 0:
                    continue

                # use -1, because I need P( x >= X)
                # sf = 1 - cdf and cdf = P( x <= X ), thus sf = 1 - P( x <= X )
                # = P (x > X ).
                r = scipy.stats.binom.sf(result.mNPositive - 1, result.mNTotal,
                                         p)

                result.mSignificance = r

                if r < options.significance_threshold:
                    new_results.append(result)

            new_results.sort(
                lambda x, y: cmp(x.mSignificance, y.mSignificance))

            options.stdlog.write(Result().getHeader() + "\n")

            for result in new_results:
                options.stdout.write(str(result) + "\n")

            if options.loglevel >= 1:
                options.stdlog.write("# ntotal=%i, npos=%i\n" %
                                     (len(results), len(new_results)))

    E.Stop()

Beispiel #9

Datei anzeigen

def main():

    parser = E.OptionParser(
        version=
        "%prog version: $Id: analyze_readpositions.py 2781 2009-09-10 11:33:14Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("--output-filename-pattern",
                      dest="output_filename_pattern",
                      type="string",
                      help="pattern for additional output files [%default].")

    parser.set_defaults(
        length=1000,
        minimum_coverage=0.90,
        maximum_reads=[1, 10, 20, 50, 100],
        output_filename_pattern="%s",
        normalize=True,
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    fields, table = CSV.ReadTable(sys.stdin, dictreader=CSV.DictReaderLarge)

    map_fields2column = {}
    for x in fields:
        map_fields2column[x] = len(map_fields2column)

    coverage_5prime = numpy.zeros(options.length, numpy.float)
    coverage_3prime = numpy.zeros(options.length, numpy.float)

    coverage_maxreads5prime = numpy.zeros(options.length, numpy.float)
    coverage_maxreads3prime = numpy.zeros(options.length, numpy.float)

    coverage_full5prime = numpy.zeros(options.length, numpy.float)
    coverage_full3prime = numpy.zeros(options.length, numpy.float)

    coverage_min5prime = numpy.zeros(options.length, numpy.float)
    coverage_min3prime = numpy.zeros(options.length, numpy.float)

    histograms = []
    for x in range(len(options.maximum_reads)):
        histograms.append([
            numpy.zeros(options.length, numpy.float),
            numpy.zeros(options.length, numpy.float), 0
        ])

    ninput, noutput, nfull, nmincov, nskipped, nlength, nmaxreads = 0, 0, 0, 0, 0, 0, 0
    for row in table:
        length, covered, meancov, data, nreads = (int(row["cov_nval"]),
                                                  float(row["cov_covered"]),
                                                  float(row["cov_mean"]),
                                                  row["cov_values"],
                                                  int(row["nover2"]))
        ninput += 1
        if length < options.length:
            nlength += 1
            continue

        if data == "na":
            nskipped += 1
            continue

        noutput += 1
        mincov = covered / length
        values = map(float, data.split(";"))
        m = max(values)
        values = [x / m for x in values]
        coverage_5prime += values[0:1000]
        coverage_3prime += values[-1000:]

        if mincov >= 1.0:
            coverage_full5prime += values[0:1000]
            coverage_full3prime += values[-1000:]
            nfull += 1

        if meancov >= options.minimum_coverage:
            coverage_min5prime += values[0:1000]
            coverage_min3prime += values[-1000:]
            nmincov += 1

        for maxreads in range(len(options.maximum_reads)):
            if nreads <= options.maximum_reads[maxreads]:
                histograms[maxreads][0] += values[0:1000]
                histograms[maxreads][1] += values[-1000:]
                histograms[maxreads][2] += 1

    if options.normalize:
        for x5, x3 in ((coverage_5prime, coverage_3prime),
                       (coverage_min5prime, coverage_min3prime),
                       (coverage_full5prime, coverage_full3prime)):
            m = max((max(x5), max(x3)))
            x3 /= m
            x5 /= m

        for x5, x3, c in histograms:
            m = max((max(x5), max(x3)))
            x5 /= m
            x3 /= m

    outfile = options.stdout
    outfile.write("\t".join(("distance", "minlen-5'", "minlen-3'", "mincov-5'",
                             "mincov-3'", "full-5'", "full-3'")) + "\n")

    for x in range(0, options.length):
        outfile.write( "\t".join( [ "%6.4f" % x for x in \
                                        (x,
                                         coverage_5prime[x],
                                         coverage_3prime[x],
                                         coverage_min5prime[x],
                                         coverage_min3prime[x],
                                         coverage_full5prime[x],
                                         coverage_full3prime[x] ) ] ) + "\n" )

    outfile5 = open(options.output_filename_pattern % "reads5", "w")
    outfile3 = open(options.output_filename_pattern % "reads3", "w")

    outfile5.write("\t".join([
        "distance",
    ] + [
        "reads%i" % options.maximum_reads[y]
        for y in range(len(options.maximum_reads))
    ]) + "\n")
    outfile3.write("\t".join([
        "distance",
    ] + [
        "reads%i" % options.maximum_reads[y]
        for y in range(len(options.maximum_reads))
    ]) + "\n")
    for x in range(0, options.length):
        outfile5.write("%i\t%s\n" % (x, "\t".join([
            "%6.4f" % histograms[y][0][x]
            for y in range(len(options.maximum_reads))
        ])))
        outfile3.write("%i\t%s\n" % (x, "\t".join([
            "%6.4f" % histograms[y][1][x]
            for y in range(len(options.maximum_reads))
        ])))

    E.info( "ninput=%i, noutput=%i, nmaxreads=%i, nfull=%i, nmincov=%i, nskipped=%i, nlength=%i" %\
                (ninput, noutput, nmaxreads, nfull, nmincov, nskipped, nlength) )

    E.Stop()

Beispiel #10

Datei anzeigen

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: csv_intersection.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-u",
                      "--unique",
                      dest="unique",
                      action="store_true",
                      help="output rows are uniq.")

    parser.set_defaults(
        remove=False,
        unique=False,
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    if len(args) != 2:
        raise "please specify two files to join."

    options.filename1, options.filename2 = args

    table1 = CSV.ReadTable(open(options.filename1, "r"))
    table2 = CSV.ReadTable(open(options.filename2, "r"))

    if options.unique:
        outfile = UniqueBuffer(sys.stdout)
    else:
        outfile = options.stdout

    # build new field list
    new_fields = []

    for x in options.join_fields1:
        new_fields.append(x)

    for x in fields1:
        if x not in options.join_fields1:
            new_fields.append(x)
        if x not in options.join_fields2:
            new_fields.append(x)

        writer = csv.DictWriter(outfile,
                                fields,
                                dialect=options.csv_dialect,
                                lineterminator=options.csv_lineterminator,
                                extrasaction='ignore')

    if len(lines) > 0:

        old_fields = lines[0][:-1].split("\t")

        if options.remove:
            fields = []
            for x in old_fields:
                if x not in input_fields:
                    fields.append(x)
        else:
            fields = input_fields

        reader = csv.DictReader(lines, dialect=options.csv_dialect)

        print "\t".join(fields)

        first_row = True
        for row in reader:
            row = CSV.ConvertDictionary(row)
            writer.writerow(row)

    E.Stop()

Beispiel #11

Datei anzeigen

Datei: csv_set.py Projekt: yangjl/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: csv_set.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-u",
                      "--unique",
                      dest="unique",
                      action="store_true",
                      help="output rows are uniq.")

    parser.add_option("-1",
                      "--join-fields1",
                      dest="join_fields1",
                      type="string",
                      help="join fields in first table.")
    parser.add_option("-2",
                      "--join-fields2",
                      dest="join_fields2",
                      type="string",
                      help="join fields in second table.")
    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      help="set operation to perform.",
                      choices=("intersection", "rest", "union"))

    parser.set_defaults(
        remove=False,
        unique=False,
        join_fields1=None,
        join_fields2=None,
        method="intersection",
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    if len(args) != 2:
        raise "please specify two files to join."

    if not options.join_fields1 or not options.join_fields2:
        raise "please specify at least one join field per table."

    options.join_fields1 = options.join_fields1.split(",")
    options.join_fields2 = options.join_fields2.split(",")

    options.filename1, options.filename2 = args

    fields1, table1 = CSV.ReadTable(open(options.filename1, "r"))
    fields2, table2 = CSV.ReadTable(open(options.filename2, "r"))

    if options.unique:
        outfile = UniqueBuffer(sys.stdout)
    else:
        outfile = options.stdout

    nfields1 = []
    for x in range(len(fields1)):
        if fields1[x] in options.join_fields1: nfields1.append(x)
    nfields2 = []
    for x in range(len(fields2)):
        if fields2[x] in options.join_fields2: nfields2.append(x)

    ## calculate row indices: double keys are not taken care of here
    keys = {}
    for row1 in table1:
        v = map(lambda x: row1[x], nfields1)
        key = hashlib.md5("".join(v)).digest()
        keys[key] = row1

    if options.method == "intersection":
        ## build new field list
        take = range(len(fields1))
        c = len(take)
        for x in fields2:
            if x not in options.join_fields2:
                take.append(c)
            c += 1

        t = fields1 + fields2

        new_fields = map(lambda x: t[x], take)

        print "\t".join(new_fields)

        for row2 in table2:
            v = map(lambda x: row2[x], nfields2)
            key = hashlib.md5("".join(v)).digest()
            if key in keys:
                new_row = keys[key] + row2
                outfile.write("\t".join(map(lambda x: new_row[x], take)) +
                              "\n")

    elif options.method == "rest":

        new_fields = fields2
        print "\t".join(new_fields)

        for row2 in table2:
            v = map(lambda x: row2[x], nfields2)
            key = hashlib.md5("".join(v)).digest()
            if key not in keys:
                outfile.write("\t".join(row2) + "\n")

    E.Stop()

Beispiel #12

Datei anzeigen

Datei: data2bins.py Projekt: santayana/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: data2bins.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("--column",
                      dest="column",
                      type="int",
                      help="column to split on.")

    parser.add_option("--num-bins",
                      dest="num_bins",
                      type="int",
                      help="number of bins to create.")

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=("equal-sized-bins", ),
                      help="method to use to bin data.")

    parser.add_option("--no-headers",
                      dest="has_headers",
                      action="store_false",
                      help="matrix has no row/column headers.")

    parser.add_option(
        "-p",
        "--output-filename-pattern",
        dest="output_filename_pattern",
        type="string",
        help=
        "OUTPUT filename with histogram information on aggregate coverages [%default]."
    )

    parser.set_defaults(
        has_headers=True,
        method="equal-sized-bins",
        column=1,
        num_bins=4,
        output_filename_pattern="bin%i",
    )

    (options, args) = E.Start(parser)
    options.column -= 1

    if args:
        if args[0] == "-":
            infile = sys.stdin
        else:
            infile = open(args[0], "r")
    else:
        infile = sys.stdin

    fields, data = CSV.ReadTable(infile)

    c = options.column
    values = [float(x[c]) for x in data]

    bins = []

    if options.method == "equal-sized-bins":
        increment = int(math.floor(float(len(values)) / options.num_bins))
        indices = range(0, len(values))
        indices.sort(key=lambda x: values[x])
        for x in xrange(len(values)):
            values[indices[x]] = x
        bins = range(0, len(values) - increment, increment)

    elif options.method == "pass":
        pass

    E.debug("bins=%s" % str(bins))

    outputters = []
    for x in xrange(0, len(bins)):
        outputters.append(
            Outputter(options.output_filename_pattern % x, fields))

    # output tables
    for x in xrange(0, len(data)):
        bin = bisect.bisect(bins, values[x]) - 1
        outputters[bin].write(data[x])

    # stats
    if options.loglevel >= 1:
        options.stdlog.write("# bin\tstart\tcounts\tfilename\n")
        for x in xrange(0, len(bins)):
            options.stdlog.write(
                "# %i\t%f\t%i\t%s\n" %
                (x, bins[x], outputters[x].mCounts, outputters[x].mFilename))

    E.info("ninput=%i, noutput=%i" %
           (len(data), sum((x.mCounts for x in outputters))))

    E.Stop()

Beispiel #13

Datei anzeigen

Datei: table2table.py Projekt: yangjl/cgat

def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: table2table.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-m", "--method", dest="methods", type="choice", action="append",
                      choices=( "transpose", "normalize-by-max","normalize-by-value","multiply-by-value",
                               "percentile","remove-header","normalize-by-table",
                               "upper-bound","lower-bound","kullback-leibler",
                                "expand","compress", "fdr", "grep" ),
                      help="""actions to perform on table.""")
    
    parser.add_option("-s", "--scale", dest="scale", type="float",
                      help="factor to scale matrix by."  )
    
    parser.add_option("-f", "--format", dest="format", type="string",
                      help="output number format."  )

    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="Parameters for various functions."  )

    parser.add_option("-t", "--headers", dest="has_headers", action="store_true",
                      help="matrix has row/column headers."  )

    parser.add_option("--transpose", dest="transpose", action="store_true",
                      help="transpose table."  )

    parser.add_option("--set-transpose-field", dest="set_transpose_field", type="string",
                      help="set first field (row 1 and col 1) to this value [%default]."  )

    parser.add_option("--transpose-format", dest="transpose_format", type="choice",
                      choices=("default", "separated", ),
                      help="input format of un-transposed table"  )

    parser.add_option("--expand", dest="expand_table", action="store_true",
                      help="expand table - multi-value cells with be expanded over several rows."  )

    parser.add_option("--no-headers", dest="has_headers", action="store_false",
                      help="matrix has no row/column headers."  )

    parser.add_option("--columns", dest="columns", type="string",
                      help="columns to use."  )

    parser.add_option( "--file", dest="file", type="string",
                      help="columns to test from table.",
                      metavar="FILE" )

    parser.add_option("-d", "--delimiter", dest="delimiter", type="string",
                      help="delimiter of columns." ,
                      metavar="DELIM" )

    parser.add_option("-V", "--invert-match", dest="invert_match", action="store_true",
                      help="invert match." )

    parser.add_option("--sort-by-rows", dest="sort_rows", type="string",
                      help="output order for rows."  )

    parser.add_option("-a", "--value", dest="value", type="float",
                      help="value to use for various algorithms."  )

    parser.add_option("--group", dest="group_column", type="int",
                      help="group values by column. Supply an integer column [default=%default]"  )

    parser.add_option("--group-function", dest="group_function", type="choice",
                      choices=("min", "max", "sum", "mean", "stats", "cat", "uniq"),
                      help="function to group values by."  )

    parser.add_option("--join-table", dest="join_column", type="int",
                      help="join rows in a table by columns."  )

    parser.add_option("--collapse-table", dest="collapse_table", type="string",
                      help="collapse a table. Value determines the missing variable [%default]."  )

    parser.add_option("--join-column-name", dest="join_column_name", type="int",
                      help="use this column as a prefix."  )

    parser.add_option("--flatten-table", dest="flatten_table", action="store_true",
                      help="flatten a table [%default]."  )

    parser.add_option("--as-column", dest="as_column", action="store_true",
                      help="output table as a single column."  )

    parser.add_option("--split-fields", dest="split_fields", action="store_true",
                      help="split fields."  )

    parser.add_option("--separator", dest="separator", type="string",
                      help="separator for multi-valued fields [default=%default]."  )

    parser.add_option( "--fdr-method", dest="fdr_method", type="choice",
                      choices = ( "BH", "bonferroni", "holm", "hommel", "hochberg", "BY" ),
                      help="method to perform multiple testing correction by controlling the fdr [default=%default]."  )

    parser.add_option( "--fdr-add-column", dest="fdr_add_column", type="string",
                       help = "add new column instead of replacing existing columns. "
                       "The value of the option will be used as prefix if there are multiple columns [%default]" )

    #IMS: add option to use a column as the row id in flatten
    parser.add_option("--id-column", dest="id_column", type ="string",
                      help="list of column(s) to use as the row id when flattening the table. "
                      "If None, then row number is used. [default=%default].")

    parser.add_option("--variable-name", dest="variable_name", type = "string",
                      help="the column header for the 'variable' column when flattening [default=%default].")

    parser.add_option("--value-name", dest="value_name", type = "string",
                      help="the column header for the 'value' column when flattening [default=%default].")


    parser.set_defaults(
        methods = [],
        scale = 1.0,
        has_headers = True,
        format = "%5.2f",
        value = 0.0,
        parameters = "",
        columns = "all",
        transpose = False,
        set_transpose_field = None,
        transpose_format = "default",
        group = False,
        group_column = 0,
        group_function = "mean",
        missing_value = "na",
        sort_rows = None,
        flatten_table= False,
        collapse_table = None,
        separator = ";",
        expand = False,
        join_column = None,
        join_column_name = None,
        compute_fdr = None,
        as_column = False,
        fdr_method= "BH",
        fdr_add_column = None,
        id_column=None,
        variable_name="column",
        value_name="value",
        file=None,
        delimiter = "\t",
        invert_match = False,
        )
    
    (options, args) = E.Start( parser, add_pipe_options = True )

    options.parameters = options.parameters.split(",")
    
    if options.group_column:
        options.group = True
        options.group_column -= 1

    ######################################################################
    ######################################################################
    ######################################################################
    ## if only to remove header, do this quickly
    if options.methods== ["remove-header"]:
        
        first = True
        for line in options.stdin:
            if line[0] == "#": continue
            if first:
                first = False
                continue
            options.stdout.write( line )

    elif options.transpose or "transpose" in options.methods:

        readAndTransposeTable( options.stdin, options )


    elif options.flatten_table:
        #IMS: bug fixed to make work. Also added options for keying on a particular
        #     and adding custom column headings

        fields, table  = CSV.ReadTable( options.stdin, with_header = options.has_headers, as_rows = True )
        
        options.columns = getColumns( fields, options.columns )
        
        if options.id_column:
            id_columns = map(lambda x: int(x) -1,options.id_column.split(","))
            id_header = "\t".join([fields[id_column] for id_column in id_columns])
            options.columns = [x for x in options.columns if x not in id_columns]
        else:
            id_header = "row"

        options.stdout.write( "%s\t%s\t%s\n" %(id_header, options.variable_name, options.value_name) )
        
        for x, row in enumerate(table):

            if options.id_column:
                row_id = "\t".join([row[int(x)-1] for x in options.id_column.split(",")])
            else:
                row_id = str(x)

            for y in options.columns:
                options.stdout.write( "%s\t%s\t%s\n" % (row_id,fields[y], row[y] ))

    elif options.as_column:
        
        fields, table  = CSV.ReadTable( options.stdin, with_header = options.has_headers, as_rows = True )
        options.columns = getColumns( fields, options.columns )
        table = zip( *table )
        
        options.stdout.write( "value\n" )
        
        for column in options.columns:
            options.stdout.write("\n".join( table[column] ) + "\n" )

    elif options.split_fields:

        # split comma separated fields
        fields, table  = CSV.ReadTable( options.stdin, 
                                        with_header = options.has_headers, 
                                        as_rows = True )
        

        options.stdout.write( "%s\n" % ("\t".join(fields)))

        for row in table:
            row = [ x.split(options.separator) for x in row ]
            for d in itertools.product( *row ):
                options.stdout.write( "%s\n" % "\t".join( d ) )
            
    elif options.group:
        readAndGroupTable( options.stdin, options )

    elif options.join_column:
        readAndJoinTable( options.stdin, options )

    elif options.expand_table:
        readAndExpandTable( options.stdin, options )

    elif options.collapse_table != None:
        readAndCollapseTable( options.stdin, options, options.collapse_table )

    elif "grep" in options.methods:

        options.columns = map(lambda x: int(x)-1, options.columns.split(","))

        patterns = []

        if options.file:
            infile = open( options.file, "r")
            for line in infile:
                if line[0] == "#": continue
                patterns.append( line[:-1].split(options.delimiter)[0] )
        else:
            patterns=args

        for line in options.stdin:

            data = line[:-1].split(options.delimiter)
            found = False

            for c in options.columns:

                if data[c] in patterns:
                    found = True
                    break

            if (not found and options.invert_match) or (found and not options.invert_match):
                print line[:-1]
    else:

        ######################################################################
        ######################################################################
        ######################################################################
        ## Apply remainder of transformations
        fields, table  = CSV.ReadTable( options.stdin, with_header = options.has_headers, as_rows = False )
        # convert columns to list
        table = [ list(x) for x in table]

        ncols = len(fields)
        if len(table) == 0:
            raise ValueError( "table is empty" )
            
        nrows = len(table[0])

        E.info( "processing table with %i rows and %i columns" % (nrows, ncols) )

        options.columns = getColumns( fields, options.columns )
        
        ## convert all values to float
        for c in options.columns:
            for r in range(nrows):
                try:
                    table[c][r] = float (table[c][r] )
                except ValueError:
                    continue

        for method in options.methods:

            if method == "normalize-by-value":

                value = float(options.parameters[0])
                del options.parameters[0]

                for c in options.columns:
                    table[c] = map( lambda x: x / value, table[c] )

            elif method == "multiply-by-value":

                value = float(options.parameters[0])
                del options.parameters[0]

                for c in options.columns:
                    table[c] = map( lambda x: x * value, table[c] )

            elif method == "normalize-by-max":

                for c in options.columns:
                    m = max( table[c] )
                    table[c] = map( lambda x: x / m, table[c] )

            elif method == "kullback-leibler":
                options.stdout.write("category1\tcategory2\tkl1\tkl2\tmean\n")
                for x in range(0,len(options.columns)-1):
                    for y in range(x+1, len(options.columns)):
                        c1 = options.columns[x]
                        c2 = options.columns[y]
                        e1 = 0
                        e2 = 0
                        for z in range(nrows):
                            p = table[c1][z]
                            q = table[c2][z]
                            e1 += p * math.log( p / q )
                            e2 += q * math.log( q / p )

                        options.stdout.write("%s\t%s\t%s\t%s\t%s\n" % (fields[c1], fields[c2],
                                                                       options.format % e1,
                                                                       options.format % e2,
                                                                       options.format % ((e1 + e2) / 2)) )
                E.Stop()
                sys.exit(0)

            elif method == "rank":

                for c in options.columns:
                    tt = table[c]
                    t = zip( tt, range(nrows) )
                    t.sort()
                    for i,n in zip( map(lambda x: x[1], t), range(nrows)):
                        tt[i] = n

            elif method in ("lower-bound", "upper-bound"):

                boundary = float(options.parameters[0])
                del options.parameters[0]
                new_value = float(options.parameters[0])
                del options.parameters[0]

                if method == "upper-bound":
                    for c in options.columns:                
                        for r in range(nrows):
                            if type(table[c][r]) == types.FloatType and \
                                   table[c][r] > boundary:
                                table[c][r] = new_value
                else:
                    for c in options.columns:                
                        for r in range(nrows):
                            if type(table[c][r]) == types.FloatType and \
                                    table[c][r] < boundary:
                                table[c][r] = new_value

            elif method == "fdr":
                pvalues = []
                for c in options.columns: pvalues.extend( table[c] )

                assert max(pvalues) <= 1.0, "pvalues > 1 in table: max=%s" % str(max(pvalues))
                assert min(pvalues) >= 0, "pvalue < 0 in table: min=%s" % str(min(pvalues))

                # convert to str to avoid test for float downstream
                qvalues = map(str, Stats.adjustPValues( pvalues, method = options.fdr_method ))

                if options.fdr_add_column == None:
                    x = 0
                    for c in options.columns: 
                        table[c] = qvalues[x:x+nrows]
                        x += nrows
                else:
                    # add new column headers

                    if len(options.columns) == 1:
                        fields.append( options.fdr_add_column )
                    else:
                        for co in options.columns:
                            fields.append( options.fdr_add_column + fields[c] )

                    x = 0
                    for c in options.columns:
                        # add a new column
                        table.append(qvalues[x:x+nrows])
                        x += nrows
                    ncols += len(options.columns)

            elif method == "normalize-by-table":

                other_table_name = options.parameters[0]
                del options.parameters[0]
                other_fields, other_table  = CSV.ReadTable( open(other_table_name, "r"), with_header = options.has_headers, as_rows = False )

                # convert all values to float
                for c in options.columns:
                    for r in range(nrows):
                        try:
                            other_table[c][r] = float (other_table[c][r] )
                        except ValueError:
                            continue

                ## set 0s to 1 in the other matrix
                for c in options.columns:            
                    for r in range(nrows):
                        if type(table[c][r]) == types.FloatType and \
                               type(other_table[c][r]) == types.FloatType and \
                               other_table[c][r] != 0:
                               table[c][r] /= other_table[c][r]
                        else:
                            table[c][r] = options.missing_value

        ## convert back
        for c in options.columns:
            for r in range(nrows):
                if type(table[c][r]) == types.FloatType:
                    table[c][r] = options.format % table[c][r]

        options.stdout.write( "\t".join(fields) + "\n" )
        if options.sort_rows:
            old2new = {}
            for r in range(nrows):
                old2new[table[0][r]] = r
            for x in options.sort_rows.split(","):
                if x not in old2new: continue
                r = old2new[x]
                options.stdout.write( "\t".join( [ table[c][r] for c in range(ncols) ] ) + "\n")            
        else:
            for r in range(nrows):
                options.stdout.write( "\t".join( [ table[c][r] for c in range(ncols) ] ) + "\n")

    E.Stop()

Beispiel #14

Datei anzeigen

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: optic/analyze_ribosomes.py 2781 2009-09-10 11:33:14Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-s",
                      "--schemas",
                      dest="schemas",
                      type="string",
                      help="schemas in the set.")

    parser.add_option("-e",
                      "--field-extract",
                      dest="field_extract",
                      type="string",
                      help="pattern for the field to extract.")

    parser.add_option("-c",
                      "--field-compare",
                      dest="field_compare",
                      type="string",
                      help="pattern for the field to compare.")

    parser.add_option("-i",
                      "--filename-identifiers",
                      dest="filename_identifiers",
                      type="string",
                      help="identifiers in the positive set.")

    parser.add_option("-u",
                      "--filename-subset",
                      dest="filename_subset",
                      type="string",
                      help="subset in the positive set.")

    parser.add_option("--filter-min-ratio",
                      dest="filter_min_ratio",
                      type="float",
                      help="minimum boundary for filter.")

    parser.add_option("--filter-max-ratio",
                      dest="filter_max_ratio",
                      type="float",
                      help="maximum boundary for filter.")

    parser.add_option(
        "-o",
        "--output-fields",
        dest="output_fields",
        type="string",
        help=
        "output fields, choices are: zscore, val, nvals, sum, min, max, stddev, mean, median."
    )

    parser.add_option(
        "--output-pattern",
        dest="output_pattern",
        type="string",
        help=
        "pattern for table headers, should contain %s for schema and %s for field anme."
    )

    parser.add_option(
        "-f",
        "--output-format",
        dest="output_format",
        type="choice",
        choices=("table", "list", "values"),
        help="output format. Tabular form (one row per ortholog) or list form."
    )

    parser.add_option("--format",
                      dest="format",
                      type="string",
                      help="output format for numbers.")

    parser.add_option("--remove-na",
                      dest="remove_na",
                      action="store_true",
                      help="remove entries with any na values.")

    parser.set_defaults(
        field_extract="%s_length",
        field_compare="%s_length",
        filename_identifiers=None,
        filename_subset=None,
        filter_min_ratio=0.00,
        filter_max_ratio=0.00,
        schemas="",
        output_fields="",
        output_pattern="%s_%s",
        output_format="table",
        format="%6.4f",
        remove_na=False,
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    options.schemas = options.schemas.split(",")
    if not options.schemas:
        raise "please supply schemas."

    if options.output_fields:
        options.output_fields = options.output_fields.split(",")
    else:
        options.output_fields = ()

    fields, table = CSV.ReadTable(sys.stdin)

    map_fields2column = {}
    for x in fields:
        map_fields2column[x] = len(map_fields2column)

    if options.loglevel >= 1:
        options.stdlog.write("# read a %i x %i table.\n" %
                             (len(table), len(fields)))

    if options.filename_subset:
        subset, nerrors = IOTools.ReadList(open(options.filename_subset, "r"))
        subset = set(subset)

        table = filter(lambda x: x[0] in subset, table)

        if options.loglevel >= 1:
            options.stdlog.write(
                "# subset of %i entries reduced table to a %i x %i table.\n" %
                (len(subset), len(table), len(fields)))

    if options.filename_identifiers:
        identifiers, nerrors = IOTools.ReadList(
            open(options.filename_identifiers, "r"))
    else:
        identifiers = []

    identifiers = set(identifiers)

    # extract rows with positive identifiers
    positive_rows = filter(lambda x: x[0] in identifiers, table)

    if options.loglevel >= 1:
        options.stdlog.write(
            "# subset of %i identifiers gives %i positive entries.\n" %
            (len(identifiers), len(positive_rows)))

    if options.output_format == "table":
        options.stdout.write("id")
        for schema in options.schemas:
            if options.output_fields:
                for field in options.output_fields:
                    options.stdout.write("\t" + options.output_pattern %
                                         (schema, field))
            else:
                options.stdout.write("\t%s" % (schema))

        options.stdout.write("\n")
    else:
        options.stdout.write("schema\tvalue\n")

    if identifiers:
        for row in positive_rows:

            if options.output_format == "table":
                options.stdout.write(row[0])

            for schema in options.schemas:

                # set fields for extraction
                f_extract = map_fields2column[options.field_extract % schema]
                f_compare = map_fields2column[options.field_compare % schema]

                # get region for extraction
                if row[f_compare] != "na":
                    r = float(row[f_compare])
                    if options.filter_min_ratio or options.filter_max_ratio:
                        mi = r * options.filter_min_ratio
                        ma = r * options.filter_max_ratio
                        f = lambda x: x[f_compare] != "na" and float(
                            x[f_compare]
                        ) >= mi and float(x[f_compare]) <= ma and x[
                            0] not in identifiers and x[f_extract] != "na"
                    else:
                        f = lambda x: x[0] not in identifiers and x[f_extract
                                                                    ] != "na"
                    # extract values: filter by minimum and maximum range and remove
                    # positive identifiers.
                    v = float(row[f_extract])
                    values = map(lambda x: float(x[f_extract]),
                                 filter(f, table))

                    stats = Stats.DistributionalParameters(values)
                else:
                    v = None

                for field in options.output_fields:

                    if v is not None:
                        if field == "zscore":
                            f = options.format % stats.getZScore(v)
                        elif field == "diff":
                            f = options.format % (v - stats["mean"])
                        elif field == "reldiff":
                            f = options.format % (
                                (v - stats["mean"]) / stats["mean"])
                        elif field == "val":
                            f = options.format % v
                        else:
                            f = options.format % stats[field]
                    else:
                        f = "na"

                    if options.output_format == "table":
                        options.stdout.write("\t%s" % f)
                    elif options.output_format == "list":
                        options.stdout.write("%s\t%s\n" % (schema, f))
                    elif options.output_format == "values":
                        options.stdout.write(
                            "%s\t%s\t%5.2f\t%s\n" %
                            (row[0], schema, v, ",".join(
                                map(lambda x: options.format % x, values))))

            if options.output_format == "table":
                options.stdout.write("\n")

    else:

        extract_columns = []

        for schema in options.schemas:
            extract_columns.append(map_fields2column[options.field_extract %
                                                     schema])

        # simply dump a subset of values
        for row in table:

            skip = False

            if options.filter_min_ratio or options.filter_max_ratio:

                master = options.schemas[0]

                v = row[map_fields2column[options.field_compare % master]]

                if v == "na":
                    continue

                v = float(v)

                mi = v * options.filter_min_ratio
                ma = v * options.filter_max_ratio

                for schema in options.schemas[1:]:

                    r = row[map_fields2column[options.field_compare % schema]]

                    if r == "na":
                        if options.remove_na:
                            skip = True
                        continue

                    r = float(r)

                    if r < mi or r > ma:
                        skip = True
                        if options.loglevel >= 3:
                            if options.format == "table":
                                options.stdout.write("* ")
                                options.stdout.write("%s\t" % row[0])
                                options.stdout.write("\t".join(
                                    [row[y] for y in extract_columns]))
                                options.stdout.write("\n")
                        break

            if skip:
                continue

            if options.output_format == "table":
                options.stdout.write("%s\t" % row[0])
                options.stdout.write("\t".join(
                    [row[y] for y in extract_columns]))
                options.stdout.write("\n")

            elif options.output_format == "list":
                has_na = False
                for x in range(len(options.schemas)):
                    v = row[extract_columns[x]]
                    if v == "na":
                        has_na = True

                if has_na and options.remove_na:
                    continue

                for x in range(len(options.schemas)):
                    options.stdout.write(
                        "%s\t%s\n" %
                        (options.schemas[x], row[extract_columns[x]]))

    E.Stop()