コード例 #1
0
ファイル: sa_util.py プロジェクト: tobes/munge
def _results_to_row_function(function, data, results, verbose=0, limit=None):
    table_name = data['name']
    fields_data = data['fields']
    primary_key = data.get('primary_key')
    table_name_temp = config.TEMP_TABLE_STR + table_name
    first = True
    count = 0
    output = []
    for row in results:
        if first:
            fields = get_result_fields(results)
            f = [field['name'] for field in fields]
        row_data = dict(zip(f, row))
        row_data = function(row_data, verbose=verbose)
        if first:
            fields = process_header(fields_data)
            create_table(table_name_temp,
                         fields,
                         primary_key=primary_key,
                         verbose=verbose)
            insert_sql = insert_rows(table_name_temp, fields)
            first = False
        output.append(row_data)
        count += 1
        if count % config.BATCH_SIZE == 0:
            run_sql(insert_sql, output)
            output = []
            if verbose:
                print('{table}: {count:,}'.format(
                    table=table_name, count=count
                ))
        if limit and count == limit:
            break
    if output:
        run_sql(insert_sql, output)

    if verbose:
        print('{table}: {count:,} rows imported'.format(
            table=table_name, count=count
        ))
    if count:
        # Add indexes
        build_indexes(table_name_temp, fields, verbose=verbose)
    return count
コード例 #2
0
ファイル: csv_util.py プロジェクト: tobes/munge
def import_csv(
    reader,
    table_name,
    fields=None,
    skip_first=False,
    description=None,
    verbose=0,
    limit=None,
    keep_table=False,
    importer=None,
):
    if keep_table and table_name not in table_list():
        keep_table = False
    temp_table = config.TEMP_TABLE_STR + table_name
    count = 0
    t_fields = []
    data = []
    has_header_row = (fields is None) or skip_first
    first = True
    set_first = False
    for row in reader:
        skip = False
        if first:
            if len(row) == 1 and row[0][:1] == "#":
                if not description:
                    description = row[0][1:].strip()
                skip = True
            else:
                if fields is None:
                    fields = row
                t_fields = process_header(fields)
                t_fns = get_fns(t_fields)
                if keep_table:
                    old_fields = table_columns(table_name)
                    if fields_match(old_fields, t_fields):
                        truncate_table(table_name, verbose=verbose)
                        temp_table = table_name
                    else:
                        keep_table = False
                if not keep_table:
                    create_table(temp_table, t_fields, verbose=verbose)
                f = [field["name"] for field in t_fields if not field.get("missing")]
                insert_sql = insert_rows(temp_table, t_fields)
                set_first = True
        if not ((description or has_header_row) and first):
            row_data = dict(zip(f, row))
            for fn in t_fns:
                fn_info = t_fns[fn]
                if fn_info[1]:
                    fn_fields = fn_info[1].split("|")
                else:
                    fn_fields = [fn]
                try:
                    row_data[fn] = fn_info[0](*[row_data[x] for x in fn_fields])
                except Exception as e:
                    # FIXME log error
                    print(str(e))
                    print(fn)
                    print(row_data)
                    skip = True
            if not skip:
                data.append(row_data)
            if count % config.BATCH_SIZE == 0 and count:
                run_sql(insert_sql, data)
                data = []
                if verbose:
                    print("{table}: {count:,}".format(table=table_name, count=count))
            if not skip:
                count += 1
            if limit and count == limit:
                break
        if set_first:
            first = False
    if data:
        run_sql(insert_sql, data)

    if verbose:
        print("{table}: {count:,} rows imported".format(table=table_name, count=count))
    # Add indexes
    if not keep_table:
        build_indexes(temp_table, t_fields, verbose=verbose)
    update_summary_table(table_name, description, importer=importer, created=not keep_table)