Beispiel #1
0
def readAndGroupTable(infile, options):
    """read table from infile and group.
    """
    fields, table = CSV.readTable(infile,
                                  with_header=options.has_headers,
                                  as_rows=True)
    options.columns = getColumns(fields, options.columns)
    assert options.group_column not in options.columns

    converter = float
    new_fields = [fields[options.group_column]
                  ] + [fields[x] for x in options.columns]

    if options.group_function == "min":
        f = min
    elif options.group_function == "max":
        f = max
    elif options.group_function == "sum":
        f = lambda z: reduce(lambda x, y: x + y, z)
    elif options.group_function == "mean":
        f = scipy.mean
    elif options.group_function == "cat":
        f = lambda x: ";".join([y for y in x if y != ""])
        converter = str
    elif options.group_function == "uniq":
        f = lambda x: ";".join([y for y in set(x) if y != ""])
        converter = str
    elif options.group_function == "stats":
        f = lambda x: str(Stats.DistributionalParameters(x))
        # update headers
        new_fields = [fields[options.group_column]]
        for c in options.columns:
            new_fields += list([
                "%s_%s" % (fields[c], x)
                for x in Stats.DistributionalParameters().getHeaders()
            ])

    # convert values to floats (except for group_column)
    # Delete rows with unconvertable values and not in options.columns
    new_table = []
    for row in table:
        skip = False
        new_row = [row[options.group_column]]

        for c in options.columns:
            if row[c] == options.missing_value:
                new_row.append(row[c])
            else:
                try:
                    new_row.append(converter(row[c]))
                except ValueError:
                    skip = True
                    break
        if not skip:
            new_table.append(new_row)
    table = new_table

    new_rows = CSV.groupTable(table, group_column=0, group_function=f)

    options.stdout.write("\t".join(new_fields) + "\n")
    for row in new_rows:
        options.stdout.write("\t".join(map(str, row)) + "\n")
Beispiel #2
0
def readAndGroupTable(infile, options):
    """read table from infile and group.
    """
    fields, table = CSV.readTable(
        infile, with_header=options.has_headers, as_rows=True)
    options.columns = getColumns(fields, options.columns)
    assert options.group_column not in options.columns

    converter = float
    new_fields = [fields[options.group_column]] + [fields[x]
                                                   for x in options.columns]

    if options.group_function == "min":
        f = min
    elif options.group_function == "max":
        f = max
    elif options.group_function == "sum":
        f = lambda z: reduce(lambda x, y: x + y, z)
    elif options.group_function == "mean":
        f = scipy.mean
    elif options.group_function == "cat":
        f = lambda x: ";".join([y for y in x if y != ""])
        converter = str
    elif options.group_function == "uniq":
        f = lambda x: ";".join([y for y in set(x) if y != ""])
        converter = str
    elif options.group_function == "stats":
        f = lambda x: str(Stats.DistributionalParameters(x))
        # update headers
        new_fields = [fields[options.group_column]]
        for c in options.columns:
            new_fields += list(map(lambda x: "%s_%s" %
                                   (fields[c], x), Stats.DistributionalParameters().getHeaders()))

    # convert values to floats (except for group_column)
    # Delete rows with unconvertable values and not in options.columns
    new_table = []
    for row in table:
        skip = False
        new_row = [row[options.group_column]]

        for c in options.columns:
            if row[c] == options.missing_value:
                new_row.append(row[c])
            else:
                try:
                    new_row.append(converter(row[c]))
                except ValueError:
                    skip = True
                    break
        if not skip:
            new_table.append(new_row)
    table = new_table

    new_rows = CSV.groupTable(table,
                              group_column=0,
                              group_function=f)

    options.stdout.write("\t".join(new_fields) + "\n")
    for row in new_rows:
        options.stdout.write("\t".join(map(str, row)) + "\n")