def test_rewrite_dquotes(self): sql = """SELECT * FROM @"./data/remap.csv" WHERE frm = 'y';""" table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual("""SELECT * FROM "remap" WHERE frm = 'y';""", sql) self.assertEqual( {'remap': expand_path_and_exists('./data/remap.csv')[0]}, map)
def test_rewrite_multiple(self): sql = """SELECT * FROM @./data/remap.csv WHERE frm = 'y' SELECT * FROM @./data/test1.csv WHERE foo = 'bar';""" table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual( """SELECT * FROM "remap" WHERE frm = 'y' SELECT * FROM "test1" WHERE foo = 'bar';""", sql) self.assertDictEqual( { 'remap': expand_path_and_exists('./data/remap.csv')[0], 'test1': expand_path_and_exists('./data/test1.csv')[0] }, map)
def execute(sql: str, headers=None, filters=None, output='-', output_format='csv', skip_lines=0, output_delimiter=',', column_remapping=None, table_remapping=None, auto_filter=False, save_db=None, load_db=None, dialect='unix', input_delimiter=',', input_quotechar='"', debug_=False): """ :param filters: {"col": [["filter", ...args...], ...] :param sql: :param headers: :param output: :param output_format: :param skip_lines: :param output_delimiter: :param column_remapping: {"col": "map_to_col", ...} :param table_remapping: {"table": "map_to_col", ...} :param auto_filter: :param save_db: :param load_db: :param dialect: :param input_delimiter: :param input_quotechar: :param debug_: :return: """ global DEBUG DEBUG = debug_ column_remapping = column_remapping or {} headers = headers or [] if headers and isinstance(headers, str): headers = [h.strip() for h in headers.split(',')] filters = filters or {} # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s) sql, tables = rewrite_sql(sql, table_remapping) debug(sql, 'sql=') debug(tables, 'tables=') # Open the database if save_db: path, exists = expand_path_and_exists(save_db) if exists: raise Error("fDatabase file {path} already exists.") con = sqlite3.connect(path) elif load_db: path, exists = expand_path_and_exists(load_db) if not exists: raise FileNotFoundError(f"Database file {path} not found.") con = sqlite3.connect(path) else: con = sqlite3.connect(":memory:") cur = con.cursor() # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file for tablename, path in tables.items(): with open(path) as f: if skip_lines: [f.readline() for _ in range(skip_lines)] reader = csv.reader(f, dialect=dialect, delimiter=input_delimiter, quotechar=input_quotechar) first, colnames = True, [] for row in reader: # debug(row) row = [n.strip() for n in row if n] if first: placeholders = ', '.join(['?'] * len(row)) col_src = headers if headers else row colnames = [ column_remapping.get(n.strip()) or n.strip() for n in col_src ] # Apply auto filtering if auto_filter: for col in colnames: if col not in filters: filters[col] = [['num']] debug(filters, 'filters (auto)=') debug(colnames, 'colnames=') colnames_str = ','.join(f'"{c}"' for c in colnames) s = f"""CREATE TABLE "{tablename}" ({colnames_str});""" debug(s) try: cur.execute(s) except sqlite3.OperationalError as e: raise Error( "Failed to create table. Most likely cause is missing headers. " "Use --headers/-r and/or --skip-lines/-k to setup headers." ) first = False continue filtered_row = apply_filters(filters, colnames, row) s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});""" # debug(f"{s}, {filtered_row}") cur.execute(s, filtered_row) con.commit() debug(sql, 'sql=') do_output(sql, cur, output, output_format, output_delimiter) con.close()
def test_rewrite_bad_syntax(self): sql = """SELECT * FROM @ WHERE frm = 'y';""" # TODO: This doesn't fail, but it should table_remap = {} # with self.assertRaises(FileNotFoundError): sql, map = rewrite_sql([sql], table_remap)
def test_rewrite_bad_quotes(self): sql = """SELECT * FROM @'foo" WHERE frm = 'y';""" table_remap = {} with self.assertRaises(FileNotFoundError): rewrite_sql([sql], table_remap)
def test_rewrite_db_table(self): sql = """SELECT * FROM foo WHERE frm = 'y';""" table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual("""SELECT * FROM foo WHERE frm = 'y';""", sql) self.assertEqual({}, map)
def test_rewrite_stdin_with_at_and_quotes(self): sql = """SELECT * FROM '@-' WHERE frm = 'y';""" # TODO: Doesn't work... maybe don't support this? table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual("""SELECT * FROM "stdin" WHERE frm = 'y';""", sql) self.assertEqual({'stdin': '-'}, map)
def test_rewrite_stdin_with_at(self): sql = """SELECT * FROM @- WHERE frm = 'y';""" table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual("""SELECT * FROM "stdin" WHERE frm = 'y';""", sql) self.assertEqual({'stdin': '-'}, map)
def execute( sql: str, headers=None, filters=None, output='-', output_format='table', skip_lines=0, output_delimiter=',', column_remapping=None, table_remapping=None, auto_filter=False, save_db=None, load_db=None, # dialect='unix', input_format='csv', input_delimiter=',', input_encoding='utf-8', input_compression=None, #input_quotechar='"', debug_=False): """ :param input_format: :param filters: {"col": [["filter", ...args...], ...] :param sql: :param headers: :param output: :param output_format: :param skip_lines: :param output_delimiter: :param column_remapping: {"col": "map_to_col", ...} :param table_remapping: {"table": "map_to_col", ...} :param auto_filter: :param save_db: :param load_db: # :param dialect: :param input_delimiter: # :param input_quotechar: :param debug_: :return: """ global DEBUG DEBUG = debug_ column_remapping = column_remapping or {} headers = headers or [] if headers and isinstance(headers, str): headers = [h.strip() for h in headers.split(',')] # debug(headers, "headers=") filters = filters or {} # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s) sql, tables = rewrite_sql(sql, table_remapping) debug(sql, 'sql=') debug(tables, 'tables=') # Open the database if save_db: path, exists = expand_path_and_exists(save_db) if exists: raise Error("fDatabase file {path} already exists.") con = sqlite3.connect(path) elif load_db: path, exists = expand_path_and_exists(load_db) if not exists: raise FileNotFoundError(f"Database file {path} not found.") con = sqlite3.connect(path) else: con = sqlite3.connect(":memory:") cur = con.cursor() # if load_db: # # Check for table conflicts # s = f"""SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;""" # result = cur.execute(s) # for tables in result.fetchall(): # pass # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file for tablename, path in tables.items(): print(path) with Stream( path, format=input_format, delimiter=input_delimiter, skip_rows=range(1, skip_lines + 1), custom_parsers={}, custom_loaders={ 's3': S3Loader, 'gs': GSLoader }, custom_writers={}, ignore_blank_headers=True, encoding=input_encoding, compression=input_compression, headers=headers if headers else 1, # fill_merged_cells=True, ) as stream: debug(stream.headers, "headers=") debug(stream.encoding, "encoding=") # print(stream.sample) first, colnames, line_num = True, [], 0 for row in stream: # print(row) debug(row, "row=") if not row: error(f"Skipping blank line num. {line_num}\n") continue row = [ n.strip() if isinstance(n, str) else n for n in row if not isinstance(n, str) or (isinstance(n, str) and n) ] # debug(row, "row=") if first: placeholders = ','.join(['?'] * len(row)) debug(placeholders, "placeholders=") colnames = [ column_remapping.get(n.strip()) or n.strip() for n in stream.headers ] # Check for duplicate column names dups = set(x for x in colnames if colnames.count(x) > 1) if dups: raise Error( f"Invalid duplicate column name(s): {', '.join(dups)}" ) # Apply auto filtering if auto_filter: for col in colnames: if col not in filters: filters[col] = [['num']] debug(filters, 'filters (auto)=') debug(colnames, 'colnames=') colnames_str = ','.join(f'"{c}"' for c in colnames) check_filters_against_columns(filters, colnames) s = f"""CREATE TABLE "{tablename}" ({colnames_str});""" debug(s) try: cur.execute(s) except sqlite3.OperationalError as e: raise Error( "Failed to create table. Most likely cause is missing headers. " "Use --headers/-r and/or --skip-lines/-k to setup headers." ) first = False # continue filtered_row = apply_filters(filters, colnames, row) if len(filtered_row) != len(colnames): error( f"Warning: Invalid row: {row!r} (line={line_num}). Skipping...\n" ) continue s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});""" debug(f"{s}, {filtered_row}") cur.execute(s, filtered_row) line_num += 1 con.commit() debug(sql, 'sql=') do_output(sql, cur, output, output_format, output_delimiter) con.close()