Beispiel #1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: csv_cut.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-r",
                      "--remove",
                      dest="remove",
                      action="store_true",
                      help="remove specified columns, keep all others.")

    parser.add_option("-u",
                      "--unique",
                      dest="unique",
                      action="store_true",
                      help="output rows are uniq.")

    parser.add_option(
        "-l",
        "--large",
        dest="large",
        action="store_true",
        help=
        "large columns. Do not use native python CSV module [default=%default]."
    )

    parser.add_option("-f",
                      "--filename-fields",
                      dest="filename_fields",
                      type="string",
                      help="filename with field information.")

    parser.set_defaults(
        remove=False,
        unique=False,
        filename_fields=None,
    )

    (options, args) = E.Start(parser, add_csv_options=True, quiet=True)

    input_fields = args

    if options.filename_fields:
        input_fields = map(
            lambda x: x[:-1].split("\t")[0],
            filter(lambda x: x[0] != "#",
                   open(options.filename_fields, "r").readlines()))

    if options.unique:
        outfile = UniqueBuffer(sys.stdout)
    else:
        outfile = options.stdout

    while 1:
        line = sys.stdin.readline()

        if not line:
            E.Stop()
            sys.exit(0)

        if line[0] == "#":
            continue

        first_line = line
        break

    old_fields = first_line[:-1].split("\t")

    fields = []
    for f in input_fields:
        # do pattern search
        if f[0] == "%" and f[-1] == "%":
            pattern = re.compile(f[1:-1])
            for o in old_fields:
                if pattern.search(o) and o not in fields:
                    fields.append(o)
        else:
            if f in old_fields:
                fields.append(f)

    if options.remove:
        fields = set(fields)
        fields = [x for x in old_fields if x not in fields]

    if options.large:
        reader = CSV.DictReaderLarge(CommentStripper(sys.stdin),
                                     fieldnames=old_fields,
                                     dialect=options.csv_dialect)
    else:
        reader = csv.DictReader(CommentStripper(sys.stdin),
                                fieldnames=old_fields,
                                dialect=options.csv_dialect)

    writer = csv.DictWriter(outfile,
                            fields,
                            dialect=options.csv_dialect,
                            lineterminator=options.csv_lineterminator,
                            extrasaction='ignore')

    print "\t".join(fields)

    first_row = True
    ninput, noutput, nerrors = 0, 0, 0

    while 1:
        ninput += 1
        try:
            row = reader.next()
        except _csv.Error, msg:
            options.stderr.write("# error while parsing: %s\n" % (msg))
            nerrors += 1
            continue
        except StopIteration:
            break
Beispiel #2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id: csv_cut.py 2782 2009-09-10 11:40:29Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-r", "--remove", dest="remove", action="store_true",
                      help="remove specified columns, keep all others.")

    parser.add_option("-u", "--unique", dest="unique", action="store_true",
                      help="output rows are uniq.")

    parser.add_option("-l", "--large", dest="large", action="store_true",
                      help="large columns. Do not use native python CSV module [default=%default].")

    parser.add_option("-f", "--filename-fields", dest="filename_fields", type="string",
                      help="filename with field information.")

    parser.set_defaults(
        remove=False,
        unique=False,
        filename_fields=None,
    )

    (options, args) = E.Start(parser,
                              add_csv_options=True,
                              quiet=True)

    statement = " ".join(args)

    if options.large:
        reader = CSV.DictReaderLarge(CommentStripper(sys.stdin),
                                     dialect=options.csv_dialect)
    else:
        reader = csv.DictReader(CommentStripper(sys.stdin),
                                dialect=options.csv_dialect)

    exec "f = lambda r: %s" % statement in locals()

    counter = E.Counter()
    writer = csv.DictWriter(options.stdout,
                            reader.fieldnames,
                            dialect=options.csv_dialect,
                            lineterminator=options.csv_lineterminator)

    writer.writerow(dict((fn, fn) for fn in reader.fieldnames))

    while 1:
        counter.input += 1
        try:
            row = reader.next()
        except _csv.Error, msg:
            options.stderr.write("# error while parsing: %s\n" % (msg))
            counter.errors += 1
            continue
        except StopIteration:
            break