def csv2sqlite(input_filename, output_filename, table_name, samples=30000, batch_size=10000, encoding='utf-8', callback=None, force_types=None): # Identify data types fobj = open_compressed(input_filename, encoding) reader = csv.reader(fobj) header = next(reader) data = [] for index, row in enumerate(reader): row = dict(zip(header, row)) if index == samples: break data.append(row) fields = rows.import_from_dicts(data, import_fields=header).fields if force_types is not None: fields.update(force_types) # Create lazy table object to be converted table = rows.Table(fields=fields) reader = csv.reader(open_compressed(input_filename, encoding)) next(reader) # skip header table._rows = reader # Export to SQLite rows.export_to_sqlite(table, output_filename, table_name=table_name, callback=callback, batch_size=batch_size)
def test_export_to_sqlite_filename(self): # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) rows.export_to_sqlite(utils.table, temp.name) table = rows.import_from_sqlite(temp.name) self.assert_table_equal(table, utils.table)
def test_sqlite_injection(self): connection = rows.export_to_sqlite(utils.table, ':memory:') with self.assertRaises(ValueError): rows.import_from_sqlite(connection, table_name='table1", "sqlite_master') with self.assertRaises(ValueError): rows.export_to_sqlite(utils.table, ':memory:', table_name='table1", "sqlite_master')
def test_export_callback(self): table = rows.import_from_dicts([{"id": number} for number in range(10)]) myfunc = mock.Mock() rows.export_to_sqlite(table, ":memory:", callback=myfunc, batch_size=3) self.assertEqual(myfunc.call_count, 4) self.assertEqual( [(x[0][0], x[0][1]) for x in myfunc.call_args_list], [(3, 3), (3, 6), (3, 9), (1, 10)], )
def test_export_callback(self): table = rows.import_from_dicts([{'id': number} for number in range(10)]) myfunc = mock.Mock() rows.export_to_sqlite(table, ':memory:', callback=myfunc, batch_size=3) self.assertEqual(myfunc.call_count, 4) self.assertEqual( [x[0][0] for x in myfunc.call_args_list], [3, 6, 9, 10] )
def test_export_to_sqlite_connection(self): # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False, mode='wb') self.files_to_delete.append(temp.name) connection = sqlite3.connect(temp.name) rows.export_to_sqlite(utils.table, connection) connection.close() table = rows.import_from_sqlite(temp.name) self.assert_table_equal(table, utils.table)
def test_issue_168(self): temp = tempfile.NamedTemporaryFile(delete=False) filename = "{}.{}".format(temp.name, self.file_extension) self.files_to_delete.append(filename) table = rows.Table(fields=OrderedDict([("jsoncolumn", rows.fields.JSONField)])) table.append({"jsoncolumn": '{"python": 42}'}) rows.export_to_sqlite(table, filename) table2 = rows.import_from_sqlite(filename) self.assert_table_equal(table, table2)
def test_export_callback(self): table = rows.import_from_dicts([{ "id": number } for number in range(10)]) myfunc = mock.Mock() rows.export_to_sqlite(table, ":memory:", callback=myfunc, batch_size=3) self.assertEqual(myfunc.call_count, 4) self.assertEqual( [(x[0][0], x[0][1]) for x in myfunc.call_args_list], [(3, 3), (3, 6), (3, 9), (1, 10)], )
def test_export_to_sqlite_forcing_table_name_appends_rows(self): # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) rows.export_to_sqlite(utils.table, temp.name, table_name='rows') rows.export_to_sqlite(utils.table, temp.name, table_name='rows') result_table = rows.import_from_sqlite(temp.name, table_name='rows') self.assertEqual(len(result_table), 2 * len(utils.table)) self.assert_table_equal(result_table, utils.table + utils.table)
def test_issue_168(self): temp = tempfile.NamedTemporaryFile(delete=False) filename = '{}.{}'.format(temp.name, self.file_extension) self.files_to_delete.append(filename) table = rows.Table(fields=OrderedDict([('jsoncolumn', rows.fields.JSONField)])) table.append({'jsoncolumn': '{"python": 42}'}) rows.export_to_sqlite(table, filename) table2 = rows.import_from_sqlite(filename) self.assert_table_equal(table, table2)
def test_issue_170(self): temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) table = rows.Table(fields= OrderedDict([('intvalue', rows.fields.IntegerField), ('floatvalue', rows.fields.FloatField)])) table.append({'intvalue': 42, 'floatvalue': 3.14}) table.append({'intvalue': None, 'floatvalue': None}) # should not raise an exception rows.export_to_sqlite(table, temp.name)
def query(input_encoding, output_encoding, input_locale, output_locale, verify_ssl, fields, output, query, sources): # TODO: may move all 'destination' to '--output' # TODO: may use sys.stdout.encoding if output_file = '-' output_encoding = output_encoding or sys.stdout.encoding or \ DEFAULT_OUTPUT_ENCODING if not query.lower().startswith('select'): field_names = '*' if fields is None else fields table_names = ', '.join( ['table{}'.format(index) for index in range(1, len(sources) + 1)]) query = 'SELECT {} FROM {} WHERE {}'.format(field_names, table_names, query) if input_locale is not None: with rows.locale_context(input_locale): tables = [ _import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources ] else: tables = [ _import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources ] sqlite_connection = rows.export_to_sqlite(tables[0], ':memory:', table_name='table1') for index, table in enumerate(tables[1:], start=2): rows.export_to_sqlite(table, sqlite_connection, table_name='table{}'.format(index)) result = rows.import_from_sqlite(sqlite_connection, query=query) if output is None: fobj = BytesIO() if output_locale is not None: with rows.locale_context(output_locale): rows.export_to_txt(result, fobj, encoding=output_encoding) else: rows.export_to_txt(result, fobj, encoding=output_encoding) fobj.seek(0) click.echo(fobj.read()) else: if output_locale is not None: with rows.locale_context(output_locale): export_to_uri(output, result, encoding=output_encoding) else: export_to_uri(output, result, encoding=output_encoding)
def test_export_to_sqlite_create_unique_table_name(self): # TODO: may test file contents temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) first_table = utils.table second_table = utils.table + utils.table third_table = utils.table + utils.table + utils.table fourth_table = utils.table + utils.table + utils.table rows.export_to_sqlite(first_table, temp.name, table_name='rows') rows.export_to_sqlite(second_table, temp.name, table_name='rows') rows.export_to_sqlite(third_table, temp.name, table_name='test') rows.export_to_sqlite(fourth_table, temp.name, table_name='test') result_first_table = rows.import_from_sqlite(temp.name, table_name='rows') result_second_table = rows.import_from_sqlite(temp.name, table_name='rows_2') result_third_table = rows.import_from_sqlite(temp.name, table_name='test') result_fourth_table = rows.import_from_sqlite(temp.name, table_name='test_2') self.assert_table_equal(result_first_table, first_table) self.assert_table_equal(result_second_table, second_table) self.assert_table_equal(result_third_table, third_table) self.assert_table_equal(result_fourth_table, fourth_table)
def test_import_from_sqlite_query_args(self): connection = rows.export_to_sqlite(utils.table, ':memory:') table = rows.import_from_sqlite(connection, query='SELECT * FROM table1 WHERE float_column > ?', query_args=(3, )) for row in table: self.assertTrue(row.float_column > 3)
def csv2sqlite(input_filename, output_filename, samples=None, batch_size=10000, encoding='utf-8', callback=None, force_types=None, table_name='table1'): 'Export a CSV file to SQLite, based on field type detection from samples' # Identify data types fobj = open_compressed(input_filename, encoding=encoding) data = list(islice(csv.DictReader(fobj), samples)) fields = rows.import_from_dicts(data).fields if force_types is not None: fields.update(force_types) # Create lazy table object to be converted # TODO: this lazyness feature will be incorported into the library soon reader = csv.reader(open_compressed(input_filename, encoding=encoding)) header = next(reader) # skip header table = rows.Table(fields=OrderedDict([(field, fields[field]) for field in header])) table._rows = reader # Export to SQLite return rows.export_to_sqlite(table, output_filename, table_name=table_name, batch_size=batch_size, callback=callback)
def test_export_to_sqlite_uses_serialize(self, mocked_serialize): temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) encoding = 'iso-8859-15' kwargs = {'test': 123, 'parameter': 3.14, } mocked_serialize.return_value = \ iter(rows.plugins.utils.serialize(utils.table)) rows.export_to_sqlite(utils.table, temp.name, encoding=encoding, **kwargs) self.assertTrue(mocked_serialize.called) self.assertEqual(mocked_serialize.call_count, 1) call = mocked_serialize.call_args self.assertEqual(call[0], (utils.table, )) kwargs['encoding'] = encoding self.assertEqual(call[1], kwargs)
def test_export_to_sqlite_uses_prepare_to_export(self, mocked_prepare_to_export): temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) encoding = "iso-8859-15" kwargs = {"test": 123, "parameter": 3.14} mocked_prepare_to_export.return_value = iter( rows.plugins.utils.prepare_to_export(utils.table) ) rows.export_to_sqlite(utils.table, temp.name, encoding=encoding, **kwargs) self.assertTrue(mocked_prepare_to_export.called) self.assertEqual(mocked_prepare_to_export.call_count, 1) call = mocked_prepare_to_export.call_args self.assertEqual(call[0], (utils.table,)) kwargs["encoding"] = encoding self.assertEqual(call[1], kwargs)
def test_export_to_sqlite_uses_prepare_to_export(self, mocked_prepare_to_export): temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) encoding = 'iso-8859-15' kwargs = {'test': 123, 'parameter': 3.14, } mocked_prepare_to_export.return_value = \ iter(rows.plugins.utils.prepare_to_export(utils.table)) rows.export_to_sqlite(utils.table, temp.name, encoding=encoding, **kwargs) self.assertTrue(mocked_prepare_to_export.called) self.assertEqual(mocked_prepare_to_export.call_count, 1) call = mocked_prepare_to_export.call_args self.assertEqual(call[0], (utils.table, )) kwargs['encoding'] = encoding self.assertEqual(call[1], kwargs)
def csv_to_sqlite( input_filename, output_filename, samples=None, dialect=None, batch_size=10000, encoding="utf-8", callback=None, force_types=None, chunk_size=8388608, table_name="table1", schema=None, ): "Export a CSV file to SQLite, based on field type detection from samples" # TODO: automatically detect encoding if encoding == `None` # TODO: should be able to specify fields if dialect is None: # Get a sample to detect dialect fobj = open_compressed(input_filename, mode="rb") sample = fobj.read(chunk_size) dialect = rows.plugins.csv.discover_dialect(sample, encoding=encoding) elif isinstance(dialect, six.text_type): dialect = csv.get_dialect(dialect) if schema is None: # Identify data types fobj = open_compressed(input_filename, encoding=encoding) data = list(islice(csv.DictReader(fobj, dialect=dialect), samples)) schema = rows.import_from_dicts(data).fields if force_types is not None: schema.update(force_types) # Create lazy table object to be converted # TODO: this lazyness feature will be incorported into the library soon so # we can call here `rows.import_from_csv` instead of `csv.reader`. reader = csv.reader( open_compressed(input_filename, encoding=encoding), dialect=dialect ) header = make_header(next(reader)) # skip header table = rows.Table(fields=OrderedDict([(field, schema[field]) for field in header])) table._rows = reader # Export to SQLite return rows.export_to_sqlite( table, output_filename, table_name=table_name, batch_size=batch_size, callback=callback, )
def query(input_encoding, output_encoding, input_locale, output_locale, verify_ssl, fields, output, query, sources): # TODO: may use sys.stdout.encoding if output_file = '-' output_encoding = output_encoding or sys.stdout.encoding or \ DEFAULT_OUTPUT_ENCODING if not query.lower().startswith('select'): field_names = '*' if fields is None else fields table_names = ', '.join(['table{}'.format(index) for index in range(1, len(sources) + 1)]) query = 'SELECT {} FROM {} WHERE {}'.format(field_names, table_names, query) if len(sources) == 1: source = detect_source(sources[0], verify_ssl=verify_ssl) if source.plugin_name != 'sqlite': if input_locale is not None: with rows.locale_context(input_locale): table = import_from_source(source, DEFAULT_INPUT_ENCODING) else: table = import_from_source(source, DEFAULT_INPUT_ENCODING) sqlite_connection = sqlite3.Connection(':memory:') rows.export_to_sqlite(table, sqlite_connection, table_name='table1') result = rows.import_from_sqlite(sqlite_connection, query=query) else: # Optimization: query the SQLite database directly result = import_from_source(source, DEFAULT_INPUT_ENCODING, query=query) else: if input_locale is not None: with rows.locale_context(input_locale): tables = [_import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources] else: tables = [_import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources] sqlite_connection = sqlite3.Connection(':memory:') for index, table in enumerate(tables, start=1): rows.export_to_sqlite(table, sqlite_connection, table_name='table{}'.format(index)) result = rows.import_from_sqlite(sqlite_connection, query=query) if output is None: fobj = BytesIO() if output_locale is not None: with rows.locale_context(output_locale): rows.export_to_txt(result, fobj, encoding=output_encoding) else: rows.export_to_txt(result, fobj, encoding=output_encoding) fobj.seek(0) click.echo(fobj.read()) else: if output_locale is not None: with rows.locale_context(output_locale): export_to_uri(result, output, encoding=output_encoding) else: export_to_uri(result, output, encoding=output_encoding)
def query( input_encoding, output_encoding, input_locale, output_locale, verify_ssl, samples, output, frame_style, query, sources, ): samples = samples if samples > 0 else None if not query.lower().startswith("select"): table_names = ", ".join( ["table{}".format(index) for index in range(1, len(sources) + 1)] ) query = "SELECT * FROM {} WHERE {}".format(table_names, query) if len(sources) == 1: source = detect_source(sources[0], verify_ssl=verify_ssl, progress=True) if source.plugin_name in ("sqlite", "postgresql"): # Optimization: query the db directly result = import_from_source( source, DEFAULT_INPUT_ENCODING, query=query, samples=samples ) else: if input_locale is not None: with rows.locale_context(input_locale): table = import_from_source( source, DEFAULT_INPUT_ENCODING, samples=samples ) else: table = import_from_source( source, DEFAULT_INPUT_ENCODING, samples=samples ) sqlite_connection = sqlite3.Connection(":memory:") rows.export_to_sqlite(table, sqlite_connection, table_name="table1") result = rows.import_from_sqlite(sqlite_connection, query=query) else: # TODO: if all sources are SQLite we can also optimize the import if input_locale is not None: with rows.locale_context(input_locale): tables = [ _import_table( source, encoding=input_encoding, verify_ssl=verify_ssl, samples=samples, ) for source in sources ] else: tables = [ _import_table( source, encoding=input_encoding, verify_ssl=verify_ssl, samples=samples, ) for source in sources ] sqlite_connection = sqlite3.Connection(":memory:") for index, table in enumerate(tables, start=1): rows.export_to_sqlite( table, sqlite_connection, table_name="table{}".format(index) ) result = rows.import_from_sqlite(sqlite_connection, query=query) # TODO: may use sys.stdout.encoding if output_file = '-' output_encoding = output_encoding or sys.stdout.encoding or DEFAULT_OUTPUT_ENCODING if output is None: fobj = BytesIO() if output_locale is not None: with rows.locale_context(output_locale): rows.export_to_txt( result, fobj, encoding=output_encoding, frame_style=frame_style ) else: rows.export_to_txt( result, fobj, encoding=output_encoding, frame_style=frame_style ) fobj.seek(0) click.echo(fobj.read()) else: if output_locale is not None: with rows.locale_context(output_locale): export_to_uri(result, output, encoding=output_encoding) else: export_to_uri(result, output, encoding=output_encoding)
def query(input_encoding, output_encoding, input_locale, output_locale, verify_ssl, fields, output, query, sources): # TODO: may use sys.stdout.encoding if output_file = '-' output_encoding = output_encoding or sys.stdout.encoding or \ DEFAULT_OUTPUT_ENCODING if not query.lower().startswith('select'): field_names = '*' if fields is None else fields table_names = ', '.join( ['table{}'.format(index) for index in range(1, len(sources) + 1)]) query = 'SELECT {} FROM {} WHERE {}'.format(field_names, table_names, query) if len(sources) == 1: source = detect_source(sources[0], verify_ssl=verify_ssl) if source.plugin_name != 'sqlite': if input_locale is not None: with rows.locale_context(input_locale): table = import_from_source(source, DEFAULT_INPUT_ENCODING) else: table = import_from_source(source, DEFAULT_INPUT_ENCODING) sqlite_connection = sqlite3.Connection(':memory:') rows.export_to_sqlite(table, sqlite_connection, table_name='table1') result = rows.import_from_sqlite(sqlite_connection, query=query) else: # Optimization: query the SQLite database directly result = import_from_source(source, DEFAULT_INPUT_ENCODING, query=query) else: if input_locale is not None: with rows.locale_context(input_locale): tables = [ _import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources ] else: tables = [ _import_table(source, encoding=input_encoding, verify_ssl=verify_ssl) for source in sources ] sqlite_connection = sqlite3.Connection(':memory:') for index, table in enumerate(tables, start=1): rows.export_to_sqlite(table, sqlite_connection, table_name='table{}'.format(index)) result = rows.import_from_sqlite(sqlite_connection, query=query) if output is None: fobj = BytesIO() if output_locale is not None: with rows.locale_context(output_locale): rows.export_to_txt(result, fobj, encoding=output_encoding) else: rows.export_to_txt(result, fobj, encoding=output_encoding) fobj.seek(0) click.echo(fobj.read()) else: if output_locale is not None: with rows.locale_context(output_locale): export_to_uri(result, output, encoding=output_encoding) else: export_to_uri(result, output, encoding=output_encoding)