Beispiel #1
0
def _get_import_fields(fields, fields_exclude):
    if fields is not None and fields_exclude is not None:
        click.echo('ERROR: `--fields` cannot be used with `--fields-exclude`',
                   err=True)
        sys.exit(20)
    elif fields is not None:
        return make_header(fields.split(','), permit_not=False)
    else:
        return None
Beispiel #2
0
def csv_to_sqlite(
    input_filename,
    output_filename,
    samples=None,
    dialect=None,
    batch_size=10000,
    encoding="utf-8",
    callback=None,
    force_types=None,
    chunk_size=8388608,
    table_name="table1",
    schema=None,
):
    "Export a CSV file to SQLite, based on field type detection from samples"

    # TODO: automatically detect encoding if encoding == `None`
    # TODO: should be able to specify fields

    if dialect is None:  # Get a sample to detect dialect
        fobj = open_compressed(input_filename, mode="rb")
        sample = fobj.read(chunk_size)
        dialect = rows.plugins.csv.discover_dialect(sample, encoding=encoding)
    elif isinstance(dialect, six.text_type):
        dialect = csv.get_dialect(dialect)

    if schema is None:  # Identify data types
        fobj = open_compressed(input_filename, encoding=encoding)
        data = list(islice(csv.DictReader(fobj, dialect=dialect), samples))
        schema = rows.import_from_dicts(data).fields
        if force_types is not None:
            schema.update(force_types)

    # Create lazy table object to be converted
    # TODO: this lazyness feature will be incorported into the library soon so
    #       we can call here `rows.import_from_csv` instead of `csv.reader`.
    reader = csv.reader(
        open_compressed(input_filename, encoding=encoding), dialect=dialect
    )
    header = make_header(next(reader))  # skip header
    table = rows.Table(fields=OrderedDict([(field, schema[field]) for field in header]))
    table._rows = reader

    # Export to SQLite
    return rows.export_to_sqlite(
        table,
        output_filename,
        table_name=table_name,
        batch_size=batch_size,
        callback=callback,
    )
Beispiel #3
0
def _get_field_names(field_names, table_field_names, permit_not=False):
    new_field_names = make_header(field_names.split(','),
                                  permit_not=permit_not)
    if not permit_not:
        diff = set(new_field_names) - set(table_field_names)
    else:
        diff = set(field_name.replace('^', '')
                   for field_name in new_field_names) - set(table_field_names)

    if diff:
        missing = ', '.join(['"{}"'.format(field) for field in diff])
        click.echo('Table does not have fields: {}'.format(missing), err=True)
        sys.exit(1)
    else:
        return new_field_names
Beispiel #4
0
def _get_field_names(field_names, table_field_names, permit_not=False):
    new_field_names = make_header(field_names.split(','),
                                  permit_not=permit_not)
    if not permit_not:
        diff = set(new_field_names) - set(table_field_names)
    else:
        diff = set(
            field_name.replace('^', '')
            for field_name in new_field_names) - set(table_field_names)

    if diff:
        missing = ', '.join(['"{}"'.format(field) for field in diff])
        click.echo('Table does not have fields: {}'.format(missing), err=True)
        sys.exit(1)
    else:
        return new_field_names
Beispiel #5
0
def join(input_encoding, output_encoding, input_locale, output_locale,
         verify_ssl, order_by, fields, fields_exclude, keys, sources,
         destination):

    export_fields = _get_import_fields(fields, fields_exclude)
    keys = make_header(keys.split(','), permit_not=False)

    if input_locale is not None:
        with rows.locale_context(input_locale):
            tables = [
                _import_table(source,
                              encoding=input_encoding,
                              verify_ssl=verify_ssl) for source in sources
            ]
    else:
        tables = [
            _import_table(source,
                          encoding=input_encoding,
                          verify_ssl=verify_ssl) for source in sources
        ]

    result = rows.join(keys, tables)
    if order_by is not None:
        order_by = _get_field_names(order_by,
                                    result.field_names,
                                    permit_not=True)
        # TODO: use complete list of `order_by` fields
        result.order_by(order_by[0].replace('^', '-'))

    if export_fields is None:
        export_fields = _get_export_fields(result.field_names, fields_exclude)
    # TODO: may use sys.stdout.encoding if output_file = '-'
    output_encoding = output_encoding or DEFAULT_OUTPUT_ENCODING
    if output_locale is not None:
        with rows.locale_context(output_locale):
            export_to_uri(result,
                          destination,
                          encoding=output_encoding,
                          export_fields=export_fields)
    else:
        export_to_uri(result,
                      destination,
                      encoding=output_encoding,
                      export_fields=export_fields)
Beispiel #6
0
def command_csv2sqlite(batch_size, samples, sources, output):

    inputs = [pathlib.Path(filename) for filename in sources]
    output = pathlib.Path(output)
    table_names = make_header(
        [filename.name.split('.')[0] for filename in inputs])
    for filename, table_name in zip(inputs, table_names):
        prefix = '[{filename} -> {db_filename}#{tablename}]'.format(
            db_filename=output.name,
            tablename=table_name,
            filename=filename.name,
        )
        pre_prefix = '{} (detecting data types)'.format(prefix)
        updater = Updater(prefix=prefix, pre_prefix=pre_prefix)
        csv2sqlite(
            six.text_type(filename),
            six.text_type(output),
            table_name=table_name,
            samples=samples,
            batch_size=batch_size,
            callback=updater.update,
        )
Beispiel #7
0
 def test_make_header_should_not_ignore_permit_not(self):
     result = plugins_utils.make_header(["abc", "^qwe", "rty"], permit_not=True)
     expected_result = ["abc", "^qwe", "rty"]
     self.assertEqual(result, expected_result)
Beispiel #8
0
 def test_make_header_should_add_underscore_if_starts_with_number(self):
     result = plugins_utils.make_header(["123", "456", "123"])
     expected_result = ["field_123", "field_456", "field_123_2"]
     self.assertEqual(result, expected_result)
 def test_make_header_should_not_ignore_permit_not(self):
     result = plugins_utils.make_header(['abc', '^qwe', 'rty'],
                                        permit_not=True)
     expected_result = ['abc', '^qwe', 'rty']
     self.assertEqual(result, expected_result)
 def test_make_header_should_add_underscore_if_starts_with_number(self):
     result = plugins_utils.make_header(['123', '456', '123'])
     expected_result = ['field_123', 'field_456', 'field_123_2']
     self.assertEqual(result, expected_result)
Beispiel #11
0
 def test_make_header_should_add_underscore_if_starts_with_number(self):
     result = plugins_utils.make_header(['123', '456', '123'])
     expected_result = ['field_123', 'field_456', 'field_123_2']
     self.assertEqual(result, expected_result)
Beispiel #12
0
 def test_make_header_should_not_ignore_permit_not(self):
     result = plugins_utils.make_header(['abc', '^qwe', 'rty'],
                                        permit_not=True)
     expected_result = ['abc', '^qwe', 'rty']
     self.assertEqual(result, expected_result)