def process_column_remapping(remap_column): """ :param remap_column: :return: """ column_remapping = {} remap_column = remap_column or [] for remap in remap_column: src, trg = remap.split('=', 1) column_remapping[apply_char_replacements( src)] = apply_char_replacements(trg) return column_remapping
def process_table_remapping(remap_table): """ :param remap_table: :return: """ table_remapping = {} remap_table = remap_table or [] for remap in remap_table: src, trg = remap.split('=', 1) # TODO: Error handling table_remapping[apply_char_replacements( src)] = apply_char_replacements(trg) return table_remapping
def preprocess_filters(filter_args): """ Check and organize filters :param filter_args: :return: """ filters = {} if filter_args: for filter_combo in filter_args: parts = filter_combo.split('|') if len(parts) < 2: raise FilterError(f"Invalid filter combo: {filter_combo}") col = parts[0] if col in filters: raise FilterError(f"Multiple filters for column: {col}") filters[col] = [] for f in parts[1:]: filter_parts = f.split(":", 1) if len(filter_parts) > 1: args = filter_parts.pop(-1) filter_parts.extend(args.split(",")) filter_name = filter_parts[0] if filter_name not in FILTERS: raise FilterError(f"Invalid filter name: {filter_name}") filter_parts = [ apply_char_replacements(p) for p in filter_parts ] filters[col].append(filter_parts) return filters
def main(args=None): global DEBUG if args is None: args = sys.argv[1:] parser = build_args_parser() args = parser.parse_args(args=args) DEBUG = args.debug debug(args, 'args=') if args.filters_list: print_filter_list_table(args.output_format) return 0 if args.replacements_list: print_replacements_table(args.output_format) return 0 if not args.sql: raise Error("You must specify the SQL to execute.") # Process table re-mappings, if any table_remapping = process_table_remapping(args.remap_table) debug(table_remapping, 'table_remapping=') # Pre-process the filters filters = preprocess_filters(args.filter) debug(filters, 'filters=') # Process the column re-mappings, if any column_remapping = process_column_remapping(args.remap_column) debug(column_remapping, 'column_remapping=') # Process delimiters input_delimiter = apply_char_replacements(args.input_delimiter) execute(args.sql, headers=args.headers, filters=filters, output=args.output, output_format=args.output_format, skip_lines=args.skip_lines, output_delimiter=',', column_remapping=column_remapping, table_remapping=table_remapping, auto_filter=args.auto_filter, save_db=args.save_db, load_db=args.load_db, input_format=args.input_format, input_delimiter=input_delimiter, input_encoding=args.input_encoding, debug_=args.debug) return 0
def rewrite_sql(sql, table_remap=None): """ Re-write the SQL, replacing @filenames with table names. Leave non-@ prefixed table names as-is. Handle stdin - and @- :param sql: :param table_remap: :return: """ table_remap = table_remap or {} tables, rewrite, i = {}, [], 0 for s in sql: s = apply_char_replacements(s) for m in FROM_PATTERN.finditer(s): if m.group(2): grp, path = 2, m.group(2) elif m.group(3): grp, path = 3, m.group(3) elif m.group(4): grp, path = 4, m.group(4) else: raise Error("Path parsing error.") if path != '-': path, exists = expand_path_and_exists(path) if not exists: raise FileNotFoundError(f"File not found: {path}") rewrite.append(s[i:m.start(grp) - (2 if grp == 2 else 1 if grp == 3 else 0)]) i = m.end(grp) + (1 if grp == 2 else 0) if path != '-': filename = os.path.basename(path) tablename = os.path.splitext(filename)[0] else: filename = '-' tablename = 'stdin' if path in table_remap: tablename = table_remap[path] elif filename in table_remap: tablename = table_remap[filename] elif tablename in table_remap: tablename = table_remap[tablename] rewrite.append(f'"{tablename}"') tables[tablename] = path rewrite.append(s[i:]) return ''.join(rewrite), tables
def rewrite_sql(sql, table_remap=None): """ Re-write the SQL, replacing @filenames with table names. Leave non-@ prefixed table names as-is. Handle stdin - and @- :param sql: :param table_remap: :return: """ table_remap = table_remap or {} tables, rewrite, i = {}, [], 0 for s in sql: # print(s) s = apply_char_replacements(s) for m in FROM_PATTERN.finditer(s): # print(m, m.groups()) if m.group(2): grp, path = 2, m.group(2) elif m.group(3): grp, path = 3, m.group(3) elif m.group(4): grp, path = 4, m.group(4) else: raise Error("Path parsing error.") # print(path) if path != '-': parse_result = urlparse(path) scheme = parse_result.scheme # print(repr(scheme)) if scheme in {'http', 'https'}: pass elif scheme == 's3': pass elif scheme == 'gs': pass elif scheme in {'file', ''}: path = parse_result.path path, exists = expand_path_and_exists(path) if not exists: raise FileNotFoundError(f"File not found: {path}") else: raise Error("Invalid URL scheme: {scheme}") rewrite.append(s[i:m.start(grp) - (2 if grp == 2 else 1 if grp == 3 else 0)]) i = m.end(grp) + (1 if grp == 2 else 0) if path != '-': filename = os.path.basename(path) tablename = os.path.splitext(filename)[0] else: filename = '-' tablename = 'stdin' if path in table_remap: tablename = table_remap[path] elif filename in table_remap: tablename = table_remap[filename] elif tablename in table_remap: tablename = table_remap[tablename] if tablename.upper() in RESERVED_WORDS: sys.stderr.write( f"Warning: Table name {tablename} is a SQLite reserved word." ) rewrite.append(f'"{tablename}"') tables[tablename] = path rewrite.append(s[i:]) return ''.join(rewrite), tables