def check_dropped_table(line, dump_tables, schema_level_restore_list, drop_table_expr): """ check if table to drop is valid (can be dropped from schema level restore) """ temp = line[len(drop_table_expr):].strip()[:-1] (schema, table) = split_fqn(temp) schema = removeEscapingDoubleQuoteInSQLString(checkAndRemoveEnclosingDoubleQuote(schema), False) table = removeEscapingDoubleQuoteInSQLString(checkAndRemoveEnclosingDoubleQuote(table), False) if (schema_level_restore_list and schema in schema_level_restore_list) or ((schema, table) in dump_tables): return True return False
def _handle_further_investigation(state, line, arguments): tables_in_table_file = arguments.tables schemas_in_schema_file = arguments.schemas_in_schema_file if state.further_investigation_required: if line.startswith(alter_table_expr): state.further_investigation_required = False # Get the full qualified table name with the correct split if line.startswith(alter_table_only_expr): tablename = get_table_from_alter_table(line, alter_table_only_expr) else: tablename = get_table_from_alter_table(line, alter_table_expr) tablename = checkAndRemoveEnclosingDoubleQuote(tablename) tablename = removeEscapingDoubleQuoteInSQLString(tablename, False) state.output = check_valid_relname(state.schema, tablename, tables_in_table_file, schemas_in_schema_file) if state.output: if state.line_buff: line = state.line_buff + line state.line_buff = '' return True, state, line return False, state, line
def check_table(schema, line, search_str, dump_tables, schema_level_restore_list=None, is_rule=False): if schema_level_restore_list and schema in schema_level_restore_list: return True if dump_tables: try: comp_set = set() start = line.index(search_str) + len(search_str) if is_rule: # cut the line nicely based on extra keyword for create rule statement # in case [WHERE condition] clause contains any special chars, cut before WHERE end = locate_unquoted_keyword(line, extra_rule_keyword[0]) if end == -1: end = locate_unquoted_keyword(line, extra_rule_keyword[1]) line = line[:end] dot_separator_idx = line.find('.') last_double_quote_idx = line.rfind('"') has_schema_table_fmt = True if dot_separator_idx != -1 else False has_special_chars = True if last_double_quote_idx != -1 else False if not has_schema_table_fmt and not has_special_chars: table = line[start:].split()[0] elif has_schema_table_fmt and not has_special_chars: full_table_name = line[start:].split()[0] _, table = split_fqn(full_table_name) elif not has_schema_table_fmt and has_special_chars: table = line[start:last_double_quote_idx + 1] else: if dot_separator_idx < last_double_quote_idx: # table name is double quoted full_table_name = line[start:last_double_idx + 1] else: # only schema name double quoted ending_space_idx = line.find(' ', dot_separator_idx) full_table_name = line[start:ending_space_idx] _, table = split_fqn(full_table_name) table = checkAndRemoveEnclosingDoubleQuote(table) table = removeEscapingDoubleQuoteInSQLString(table, False) comp_set.add((schema, table)) if comp_set.issubset(dump_tables): return True return False except: return False else: return False
def extract_table(line): """ Instead of looking for table name ending index based on empty space, find it in the reverse way based on the ' (' whereas the column definition starts. Removing the enclosing double quote only, don't do strip('"') in case table name has double quote """ temp = line[len_copy_expr:] idx = temp.rfind(" (") if idx != -1: table = temp[:idx] return checkAndRemoveEnclosingDoubleQuote(table) idx = temp.rfind(" FROM") if idx != -1: table = temp[:idx] return checkAndRemoveEnclosingDoubleQuote(table) else: raise Exception('Failed to extract table name from line %s' % line)
def extract_table(line): """ Instead of looking for table name ending index based on empty space, find it in the reverse way based on the ' (' whereas the column definition starts. Removing the enclosing double quote only, don't do strip('"') in case table name has double quote """ temp = line[len_copy_expr:] idx = temp.rfind(" (") if idx == -1: raise Exception('Failed to extract table name from line %s' % line) table = temp[:idx] return checkAndRemoveEnclosingDoubleQuote(table)
def extract_schema(line): """ Instead of searching ',' in forwarding way, search ', pg_catalog;' reversely, in case schema name contains comma. Remove enclosing double quotes only, in case quote is part of the schema name """ temp = line[len_search_path_expr:] idx = temp.rfind(", pg_catalog;") if idx == -1: raise Exception('Failed to extract schema name from line %s' % line) schema = temp[:idx] return checkAndRemoveEnclosingDoubleQuote(schema)
def check_table(schema, line, search_str, dump_tables, schema_level_restore_list=None, is_rule=False): if schema_level_restore_list and schema in schema_level_restore_list: return True if dump_tables: try: comp_set = set() start = line.index(search_str) + len(search_str) if is_rule: # cut the line nicely based on extra keyword for create rule statement # in case [WHERE condition] clause contains any special chars, cut before WHERE end = locate_unquoted_keyword(line, extra_rule_keyword[0]) if end == -1: end = locate_unquoted_keyword(line, extra_rule_keyword[1]) line = line[:end] dot_separator_idx = line.find(".") last_double_quote_idx = line.rfind('"') has_schema_table_fmt = True if dot_separator_idx != -1 else False has_special_chars = True if last_double_quote_idx != -1 else False if not has_schema_table_fmt and not has_special_chars: table = line[start:].split()[0] elif has_schema_table_fmt and not has_special_chars: full_table_name = line[start:].split()[0] _, table = split_fqn(full_table_name) elif not has_schema_table_fmt and has_special_chars: table = line[start : last_double_quote_idx + 1] else: if dot_separator_idx < last_double_quote_idx: # table name is double quoted full_table_name = line[start : last_double_idx + 1] else: # only schema name double quoted ending_space_idx = line.find(" ", dot_separator_idx) full_table_name = line[start:ending_space_idx] _, table = split_fqn(full_table_name) table = checkAndRemoveEnclosingDoubleQuote(table) table = removeEscapingDoubleQuoteInSQLString(table, False) comp_set.add((schema, table)) if comp_set.issubset(dump_tables): return True return False except: return False else: return False
def process_schema(dump_schemas, dump_tables, fdin, fdout, change_schema=None, schema_level_restore_list=None): """ Filter the dump file line by line from restore dump_schemas: set of schemas to restore dump_tables: set of (schema, table) tuple to restore fdin: stdin from dump file fdout: to write filtered content to stdout change_schema_name: different schema name to restore schema_level_restore_list: list of schemas to restore all tables under them """ schema, table = None, None line_buff = '' # to help decide whether or not to filter out output = False # to help exclude SET clause within a function's ddl statement function_ddl = False further_investigation_required = False search_path = True passedDropSchemaSection = False for line in fdin: if search_path and (line[0] == set_start) and line.startswith(search_path_expr): further_investigation_required = False # schema in set search_path line is already escaped in dump file schema = extract_schema(line) schema_wo_escaping = removeEscapingDoubleQuoteInSQLString(schema, False) if (dump_schemas and schema_wo_escaping in dump_schemas or schema_level_restore_list and schema_wo_escaping in schema_level_restore_list): if change_schema and len(change_schema) > 0: # change schema name can contain special chars including white space, double quote that. # if original schema name is already quoted, replaced it with quoted change schema name quoted_schema = '"' + schema + '"' if quoted_schema in line: line = line.replace(quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) else: line = line.replace(schema, escapeDoubleQuoteInSQLString(change_schema)) output = True search_path = False else: output = False # set_assignment must be in the line to filter out dump line: SET SUBPARTITION TEMPLATE elif (line[0] == set_start) and line.startswith(set_expr) and set_assignment in line and not function_ddl: output = True elif (line[0] == drop_start) and line.startswith(drop_expr): if line.startswith(drop_table_expr) or line.startswith(drop_external_table_expr): if passedDropSchemaSection: output = False else: if line.startswith(drop_table_expr): output = check_dropped_table(line, dump_tables, schema_level_restore_list, drop_table_expr) else: output = check_dropped_table(line, dump_tables, schema_level_restore_list, drop_external_table_expr) else: output = False elif line[:2] == comment_start_expr and line.startswith(comment_expr): # Parse the line using get_table_info for SCHEMA relation type as well, # if type is SCHEMA, then the value of name returned is schema's name, and returned schema is represented by '-' name, type, schema = get_table_info(line, comment_expr) output = False function_ddl = False passedDropSchemaSection = True if type in ['TABLE', 'EXTERNAL TABLE']: further_investigation_required = False output = check_valid_table(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True elif type in ['CONSTRAINT']: further_investigation_required = True if (dump_schemas and schema in dump_schemas) or (schema_level_restore_list and schema in schema_level_restore_list): line_buff = line elif type in ['ACL']: output = check_valid_table(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True elif type in ['SCHEMA']: output = check_valid_schema(name, dump_schemas, schema_level_restore_list) if output: search_path = True elif type in ['FUNCTION']: function_ddl = True elif (line[:2] == comment_start_expr) and (line.startswith(comment_data_expr_a) or line.startswith(comment_data_expr_b)): passedDropSchemaSection = True further_investigation_required = False if line.startswith(comment_data_expr_a): name, type, schema = get_table_info(line, comment_data_expr_a) else: name, type, schema = get_table_info(line, comment_data_expr_b) if type == 'TABLE DATA': output = check_valid_table(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True else: output = False elif further_investigation_required: if line.startswith(alter_table_only_expr) or line.startswith(alter_table_expr): further_investigation_required = False # Get the full qualified table name with the correct split if line.startswith(alter_table_only_expr): tablename = get_table_from_alter_table(line, alter_table_only_expr) else: tablename = get_table_from_alter_table(line, alter_table_expr) tablename = checkAndRemoveEnclosingDoubleQuote(tablename) tablename = removeEscapingDoubleQuoteInSQLString(tablename, False) output = check_valid_table(schema, tablename, dump_tables, schema_level_restore_list) if output: if line_buff: fdout.write(line_buff) line_buff = '' search_path = True else: further_investigation_required = False if output: fdout.write(line)
def process_schema(dump_schemas, dump_tables, fdin, fdout, change_schema=None, schema_level_restore_list=None): """ Filter the dump file line by line from restore dump_schemas: set of schemas to restore dump_tables: set of (schema, table) tuple to restore fdin: stdin from dump file fdout: to write filtered content to stdout change_schema_name: different schema name to restore schema_level_restore_list: list of schemas to restore all tables under them """ schema, table = None, None line_buff = '' # to help decide whether or not to filter out output = False # to help exclude SET clause within a function's ddl statement function_ddl = False further_investigation_required = False search_path = True passedDropSchemaSection = False for line in fdin: if search_path and ( line[0] == set_start) and line.startswith(search_path_expr): further_investigation_required = False # schema in set search_path line is already escaped in dump file schema = extract_schema(line) schema_wo_escaping = removeEscapingDoubleQuoteInSQLString( schema, False) if (dump_schemas and schema_wo_escaping in dump_schemas or schema_level_restore_list and schema_wo_escaping in schema_level_restore_list): if change_schema and len(change_schema) > 0: # change schema name can contain special chars including white space, double quote that. # if original schema name is already quoted, replaced it with quoted change schema name quoted_schema = '"' + schema + '"' if quoted_schema in line: line = line.replace( quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) else: line = line.replace( schema, escapeDoubleQuoteInSQLString(change_schema)) output = True search_path = False else: output = False # set_assignment must be in the line to filter out dump line: SET SUBPARTITION TEMPLATE elif (line[0] == set_start) and line.startswith( set_expr) and set_assignment in line and not function_ddl: output = True elif (line[0] == drop_start) and line.startswith(drop_expr): if line.startswith(drop_table_expr) or line.startswith( drop_external_table_expr): if passedDropSchemaSection: output = False else: if line.startswith(drop_table_expr): output = check_dropped_table( line, dump_tables, schema_level_restore_list, drop_table_expr) else: output = check_dropped_table( line, dump_tables, schema_level_restore_list, drop_external_table_expr) else: output = False elif line[:2] == comment_start_expr and line.startswith(comment_expr): # Parse the line using get_table_info for SCHEMA relation type as well, # if type is SCHEMA, then the value of name returned is schema's name, and returned schema is represented by '-' name, type, schema = get_table_info(line, comment_expr) output = False function_ddl = False passedDropSchemaSection = True if type in ['TABLE', 'EXTERNAL TABLE']: further_investigation_required = False output = check_valid_table(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True elif type in ['CONSTRAINT']: further_investigation_required = True if (dump_schemas and schema in dump_schemas) or ( schema_level_restore_list and schema in schema_level_restore_list): line_buff = line elif type in ['ACL']: output = check_valid_table(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True elif type in ['SCHEMA']: output = check_valid_schema(name, dump_schemas, schema_level_restore_list) if output: search_path = True elif type in ['FUNCTION']: function_ddl = True elif (line[:2] == comment_start_expr) and ( line.startswith(comment_data_expr_a) or line.startswith(comment_data_expr_b)): passedDropSchemaSection = True further_investigation_required = False if line.startswith(comment_data_expr_a): name, type, schema = get_table_info(line, comment_data_expr_a) else: name, type, schema = get_table_info(line, comment_data_expr_b) if type == 'TABLE DATA': output = check_valid_table(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True else: output = False elif further_investigation_required: if line.startswith(alter_table_only_expr) or line.startswith( alter_table_expr): further_investigation_required = False # Get the full qualified table name with the correct split if line.startswith(alter_table_only_expr): tablename = get_table_from_alter_table( line, alter_table_only_expr) else: tablename = get_table_from_alter_table( line, alter_table_expr) tablename = checkAndRemoveEnclosingDoubleQuote(tablename) tablename = removeEscapingDoubleQuoteInSQLString( tablename, False) output = check_valid_table(schema, tablename, dump_tables, schema_level_restore_list) if output: if line_buff: fdout.write(line_buff) line_buff = '' search_path = True else: further_investigation_required = False if output: fdout.write(line)
def process_schema(dump_schemas, dump_tables, fdin, fdout, change_schema=None, schema_level_restore_list=None): """ Filter the dump file line by line from restore dump_schemas: set of schemas to restore dump_tables: set of (schema, table) tuple to restore fdin: stdin from dump file fdout: to write filtered content to stdout change_schema_name: different schema name to restore schema_level_restore_list: list of schemas to restore all tables under them """ schema, table = None, None line_buff = '' # to help decide whether or not to filter out output = False # to help exclude SET clause within a function's ddl statement function_ddl = False further_investigation_required = False # we need to set search_path to true after every ddl change due to the # fact that the schema "set search_path" may change on the next ddl command search_path = True passedDropSchemaSection = False cast_func_schema = None change_cast_func_schema = False in_block = False for line in fdin: # NOTE: We are checking the first character before actually verifying # the line with "startswith" due to the performance gain. if in_block: output = True elif (line[0] == begin_start) and line.startswith(begin_expr): in_block = True output = True elif (line[0] == end_start) and line.startswith(end_expr): in_block = False output = True elif search_path and (line[0] == set_start) and line.startswith(search_path_expr): # NOTE: The goal is to output the correct mapping to the search path # for the schema further_investigation_required = False # schema in set search_path line is already escaped in dump file schema = extract_schema(line) schema_wo_escaping = removeEscapingDoubleQuoteInSQLString(schema, False) if schema == "pg_catalog": output = True elif (dump_schemas and schema_wo_escaping in dump_schemas or schema_level_restore_list and schema_wo_escaping in schema_level_restore_list): if change_schema and len(change_schema) > 0: # change schema name can contain special chars including white space, double quote that. # if original schema name is already quoted, replaced it with quoted change schema name quoted_schema = '"' + schema + '"' if quoted_schema in line: line = line.replace(quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) else: line = line.replace(schema, escapeDoubleQuoteInSQLString(change_schema)) cast_func_schema = schema # Save the schema in case we need to replace a cast's function's schema later output = True search_path = False else: output = False # set_assignment must be in the line to filter out dump line: SET SUBPARTITION TEMPLATE elif (line[0] == set_start) and line.startswith(set_expr) and set_assignment in line and not function_ddl: output = True elif (line[0] == drop_start) and line.startswith(drop_expr): if line.startswith(drop_table_expr) or line.startswith(drop_external_table_expr): if passedDropSchemaSection: output = False else: if line.startswith(drop_table_expr): output = check_dropped_table(line, dump_tables, schema_level_restore_list, drop_table_expr) else: output = check_dropped_table(line, dump_tables, schema_level_restore_list, drop_external_table_expr) else: output = False elif line[:2] == comment_start_expr and line.startswith(comment_expr): # Parse the line using get_table_info for SCHEMA relation type as well, # if type is SCHEMA, then the value of name returned is schema's name, and returned schema is represented by '-' name, type, schema = get_table_info(line, comment_expr) output = False function_ddl = False passedDropSchemaSection = True if type in ['SCHEMA']: # Make sure that schemas are created before restoring the desired tables. output = check_valid_schema(name, dump_schemas, schema_level_restore_list) elif type in ['TABLE', 'EXTERNAL TABLE', 'VIEW', 'SEQUENCE']: further_investigation_required = False output = check_valid_relname(schema, name, dump_tables, schema_level_restore_list) elif type in ['CONSTRAINT']: further_investigation_required = True if check_valid_schema(schema, dump_schemas, schema_level_restore_list): line_buff = line elif type in ['ACL']: output = check_valid_relname(schema, name, dump_tables, schema_level_restore_list) elif type in ['FUNCTION']: function_ddl = True output = check_valid_schema(schema, dump_schemas, schema_level_restore_list) elif type in ['CAST', 'PROCEDURAL LANGUAGE']: # Restored to pg_catalog, so always filtered in output = True change_cast_func_schema = True # When changing schemas, we need to ensure that functions used in casts reference the new schema if output: search_path = True elif (line[:2] == comment_start_expr) and (line.startswith(comment_data_expr_a) or line.startswith(comment_data_expr_b)): passedDropSchemaSection = True further_investigation_required = False if line.startswith(comment_data_expr_a): name, type, schema = get_table_info(line, comment_data_expr_a) else: name, type, schema = get_table_info(line, comment_data_expr_b) if type == 'TABLE DATA': output = check_valid_relname(schema, name, dump_tables, schema_level_restore_list) if output: search_path = True else: output = False elif further_investigation_required: if line.startswith(alter_table_only_expr) or line.startswith(alter_table_expr): further_investigation_required = False # Get the full qualified table name with the correct split if line.startswith(alter_table_only_expr): tablename = get_table_from_alter_table(line, alter_table_only_expr) else: tablename = get_table_from_alter_table(line, alter_table_expr) tablename = checkAndRemoveEnclosingDoubleQuote(tablename) tablename = removeEscapingDoubleQuoteInSQLString(tablename, False) output = check_valid_relname(schema, tablename, dump_tables, schema_level_restore_list) if output: if line_buff: fdout.write(line_buff) line_buff = '' search_path = True elif change_cast_func_schema: if "CREATE CAST" in line and "WITH FUNCTION" in line: change_cast_func_schema = False if change_schema and len(change_schema) > 0: quoted_schema = '"' + cast_func_schema + '"' if quoted_schema in line: line = line.replace(quoted_schema, escapeDoubleQuoteInSQLString(change_schema)) else: line = line.replace(cast_func_schema, escapeDoubleQuoteInSQLString(change_schema)) cast_func_schema = None else: further_investigation_required = False if output: fdout.write(line)