Beispiel #1
0
 def row_iter(rows, reader):
     for row in rows:
         yield quoteRow(row, take, map_column2type,
                        options.missing_values,
                        null=options.null,
                        string_value=options.string_value)
     for data in reader:
         yield quoteRow(CSV.ConvertDictionary(data, map=options.map),
                        take,
                        map_column2type,
                        options.missing_values,
                        null=options.null,
                        string_value=options.string_value)
Beispiel #2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: csv_intersection.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-u",
                      "--unique",
                      dest="unique",
                      action="store_true",
                      help="output rows are uniq.")

    parser.set_defaults(
        remove=False,
        unique=False,
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    if len(args) != 2:
        raise "please specify two files to join."

    options.filename1, options.filename2 = args

    table1 = CSV.ReadTable(open(options.filename1, "r"))
    table2 = CSV.ReadTable(open(options.filename2, "r"))

    if options.unique:
        outfile = UniqueBuffer(sys.stdout)
    else:
        outfile = options.stdout

    # build new field list
    new_fields = []

    for x in options.join_fields1:
        new_fields.append(x)

    for x in fields1:
        if x not in options.join_fields1:
            new_fields.append(x)
        if x not in options.join_fields2:
            new_fields.append(x)

        writer = csv.DictWriter(outfile,
                                fields,
                                dialect=options.csv_dialect,
                                lineterminator=options.csv_lineterminator,
                                extrasaction='ignore')

    if len(lines) > 0:

        old_fields = lines[0][:-1].split("\t")

        if options.remove:
            fields = []
            for x in old_fields:
                if x not in input_fields:
                    fields.append(x)
        else:
            fields = input_fields

        reader = csv.DictReader(lines, dialect=options.csv_dialect)

        print "\t".join(fields)

        first_row = True
        for row in reader:
            row = CSV.ConvertDictionary(row)
            writer.writerow(row)

    E.Stop()
Beispiel #3
0
                                dialect=options.dialect,
                                fieldnames=options.header)

    if options.replace_header:
        reader.next()

    E.info("reading %i columns to guess column types" % options.guess_size)

    rows = []
    for row in reader:
        if None in row:
            raise ValueError("undefined columns in input file at row: %s" %
                             row)

        try:
            rows.append(CSV.ConvertDictionary(row, map=options.map))
        except TypeError, msg:
            E.warn("incomplete line? Type error in conversion: "
                   "'%s' with data: %s" % (msg, str(row)))
        except ValueError, msg:
            E.warn("incomplete line? Type error in conversion: "
                   "'%s' with data: %s" % (msg, str(row)))

        if len(rows) >= options.guess_size:
            break

    E.info("read %i rows for type guessing" % len(rows))
    E.info("creating table")

    if len(rows) == 0:
        if options.allow_empty:
Beispiel #4
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: csv2xls.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-o", "--outfile=", dest="output_filename", type="string",
                      help="write to output filename.")

    parser.set_defaults(
        output_filename=None,
    )

    (options, args) = E.Start(parser, add_csv_options=True)

    if not options.output_filename:
        raise ValueError("please specify an output filename.")

    w = openpyxl.Workbook(optimized_write=True)

    # create styles
    header_style = GetHeaderStyle()
    data_style = GetDataStyle()

    for filename in args:

        lines = filter(lambda x: x[0] != "#", open(filename, "r").readlines())

        if len(lines) == 0:
            continue

        if options.loglevel >= 2:
            print "# read %i rows" % len(lines)
            sys.stdout.flush()

        headers = lines[0][:-1].split("\t")

        ws = w.add_sheet(os.path.basename(filename))

        cur_row = 0

        ws.append(headers)

        cur_row += 1

        reader = csv.DictReader(lines, dialect=options.csv_dialect)

        for row in reader:
            row = CSV.ConvertDictionary(row)

            data = [row.get(headers[x], "") for x in range(len(headers))]
            ws.append(data)

            cur_row += 1

    w.save(options.output_filename)

    E.Stop()