def prepare_to_export(table, export_fields=None, *args, **kwargs): # TODO: optimize for more used cases (export_fields=None) table_type = type(table) if table_type not in (FlexibleTable, Table): raise ValueError("Table type not recognized") if export_fields is None: # we use already slugged-fieldnames export_fields = table.field_names else: # we need to slug all the field names export_fields = make_header(export_fields) table_field_names = table.field_names diff = set(export_fields) - set(table_field_names) if diff: field_names = ", ".join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) yield export_fields if table_type is Table: field_indexes = list(map(table_field_names.index, export_fields)) for row in table._rows: yield [row[field_index] for field_index in field_indexes] elif table_type is FlexibleTable: for row in table._rows: yield [row[field_name] for field_name in export_fields]
def command_csv_to_sqlite( batch_size, samples, input_encoding, dialect, schemas, sources, output ): inputs = [pathlib.Path(filename) for filename in sources] output = pathlib.Path(output) table_names = make_header([filename.name.split(".")[0] for filename in inputs]) schemas = _get_schemas_for_inputs(schemas, inputs) for filename, table_name, schema in zip(inputs, table_names, schemas): prefix = "[{filename} -> {db_filename}#{tablename}]".format( db_filename=output.name, tablename=table_name, filename=filename.name ) pre_prefix = "{} (detecting data types)".format(prefix) progress = ProgressBar(prefix=prefix, pre_prefix=pre_prefix) csv_to_sqlite( six.text_type(filename), six.text_type(output), dialect=dialect, table_name=table_name, samples=samples, batch_size=batch_size, callback=progress.update, encoding=input_encoding, schema=schema, ) progress.close()
def _get_import_fields(fields, fields_exclude): if fields is not None and fields_exclude is not None: click.echo("ERROR: `--fields` cannot be used with `--fields-exclude`", err=True) sys.exit(20) elif fields is not None: return make_header(fields.split(","), permit_not=False) else: return None
def deserialize(self, row): field_names = list(row.keys()) field_mapping = { old: self.fields[new] for old, new in zip(field_names, make_header(field_names)) } return { key: field_mapping[key].deserialize(value) for key, value in row.items() }
def join( input_encoding, output_encoding, input_locale, output_locale, verify_ssl, order_by, fields, fields_exclude, keys, sources, destination, ): export_fields = _get_import_fields(fields, fields_exclude) keys = make_header(keys.split(","), permit_not=False) if input_locale is not None: with rows.locale_context(input_locale): tables = [ _import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources ] else: tables = [ _import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources ] result = rows.join(keys, tables) if order_by is not None: order_by = _get_field_names(order_by, result.field_names, permit_not=True) # TODO: use complete list of `order_by` fields result.order_by(order_by[0].replace("^", "-")) if export_fields is None: export_fields = _get_export_fields(result.field_names, fields_exclude) # TODO: may use sys.stdout.encoding if output_file = '-' output_encoding = output_encoding or DEFAULT_OUTPUT_ENCODING if output_locale is not None: with rows.locale_context(output_locale): export_to_uri( result, destination, encoding=output_encoding, export_fields=export_fields, ) else: export_to_uri( result, destination, encoding=output_encoding, export_fields=export_fields )
def _get_field_names(field_names, table_field_names, permit_not=False): new_field_names = make_header(field_names.split(","), permit_not=permit_not) if not permit_not: diff = set(new_field_names) - set(table_field_names) else: diff = set(field_name.replace("^", "") for field_name in new_field_names) - set( table_field_names ) if diff: missing = ", ".join(['"{}"'.format(field) for field in diff]) click.echo("Table does not have fields: {}".format(missing), err=True) sys.exit(1) else: return new_field_names
def create_table(data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs): """Create a rows.Table object based on data rows and some configurations - `skip_header` is only used if `fields` is set - `samples` is only used if `fields` is `None`. If samples=None, all data is filled in memory - use with caution. - `force_types` is only used if `fields` is `None` - `import_fields` can be used either if `fields` is set or not, the resulting fields will seek its order - `fields` must always be in the same order as the data """ table_rows = iter(data) force_types = force_types or {} if import_fields is not None: import_fields = make_header(import_fields) if fields is None: # autodetect field types # TODO: may add `type_hints` parameter so autodetection can be easier # (plugins may specify some possible field types). header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) table_rows = chain(sample_rows, table_rows) else: sample_rows = table_rows = list(table_rows) # Detect field types using only the desired columns detected_fields = detect_types( header, sample_rows, skip_indexes=[ index for index, field in enumerate(header) if field in force_types or field not in ( import_fields or header) ], *args, **kwargs) # Check if any field was added during detecting process new_fields = [ field_name for field_name in detected_fields.keys() if field_name not in header ] # Finally create the `fields` with both header and new field names, # based on detected fields `and force_types` fields = OrderedDict([(field_name, detected_fields.get(field_name, TextField)) for field_name in header + new_fields]) fields.update(force_types) # Update `header` and `import_fields` based on new `fields` header = list(fields.keys()) if import_fields is None: import_fields = header else: # using provided field types if not isinstance(fields, OrderedDict): raise ValueError("`fields` must be an `OrderedDict`") if skip_header: # If we're skipping the header probably this row is not trustable # (can be data or garbage). _ = next(table_rows) header = make_header(list(fields.keys())) if import_fields is None: import_fields = header fields = OrderedDict([(field_name, fields[key]) for field_name, key in zip(header, fields)]) diff = set(import_fields) - set(header) if diff: field_names = ", ".join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) fields = OrderedDict([(field_name, fields[field_name]) for field_name in import_fields]) get_row = get_items(*map(header.index, import_fields)) table = Table(fields=fields, meta=meta) table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows) return table
def create_table( data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs ): """Create a rows.Table object based on data rows and some configurations - `skip_header` is only used if `fields` is set - `samples` is only used if `fields` is `None`. If samples=None, all data is filled in memory - use with caution. - `force_types` is only used if `fields` is `None` - `import_fields` can be used either if `fields` is set or not, the resulting fields will seek its order - `fields` must always be in the same order as the data """ table_rows = iter(data) force_types = force_types or {} if import_fields is not None: import_fields = make_header(import_fields) if fields is None: # autodetect field types # TODO: may add `type_hints` parameter so autodetection can be easier # (plugins may specify some possible field types). header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) table_rows = chain(sample_rows, table_rows) else: sample_rows = table_rows = list(table_rows) # Detect field types using only the desired columns detected_fields = detect_types( header, sample_rows, skip_indexes=[ index for index, field in enumerate(header) if field in force_types or field not in (import_fields or header) ], *args, **kwargs ) # Check if any field was added during detecting process new_fields = [ field_name for field_name in detected_fields.keys() if field_name not in header ] # Finally create the `fields` with both header and new field names, # based on detected fields `and force_types` fields = OrderedDict( [ (field_name, detected_fields.get(field_name, TextField)) for field_name in header + new_fields ] ) fields.update(force_types) # Update `header` and `import_fields` based on new `fields` header = list(fields.keys()) if import_fields is None: import_fields = header else: # using provided field types if not isinstance(fields, OrderedDict): raise ValueError("`fields` must be an `OrderedDict`") if skip_header: # If we're skipping the header probably this row is not trustable # (can be data or garbage). _ = next(table_rows) header = make_header(list(fields.keys())) if import_fields is None: import_fields = header fields = OrderedDict( [(field_name, fields[key]) for field_name, key in zip(header, fields)] ) diff = set(import_fields) - set(header) if diff: field_names = ", ".join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) fields = OrderedDict( [(field_name, fields[field_name]) for field_name in import_fields] ) get_row = get_items(*map(header.index, import_fields)) table = Table(fields=fields, meta=meta) table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows) return table