def _handle_target_table_token(self, sub_token: TokenList) -> None: if isinstance(sub_token, Function): # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information if not isinstance(sub_token.token_first(skip_cm=True), Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add( Table.create(sub_token.token_first(skip_cm=True))) elif isinstance(sub_token, Comparison): # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information if not (isinstance(sub_token.left, Identifier) and isinstance(sub_token.right, Identifier)): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create(sub_token.left)) self._lineage_result.read.add(Table.create(sub_token.right)) else: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create(sub_token))
def _handle_temp_table_token(self, sub_token: TokenList) -> None: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.intermediate.add(Table.create(sub_token)) self._extract_from_dml(sub_token)
def __init__(self, name: str, schema: Schema = Schema()): if len(name.split(".")) == 2: schema_name, table_name = name.split(".") self.schema = Schema(schema_name) self.raw_name = table_name if schema: warnings.warn("Name is in schema.table format, schema param is ignored") elif "." not in name: self.schema = schema self.raw_name = name else: raise SQLLineageException("Invalid format for table name: %s", name)
def __init__(self, name: str, schema: Schema = Schema()): """ Data Class for Table :param name: table name :param schema: schema as defined by :class:`Schema` """ if "." not in name: self.schema = schema self.raw_name = escape_identifier_name(name) else: schema_name, table_name = name.rsplit(".", 1) if len(schema_name.split(".")) > 2: # allow db.schema as schema_name, but a.b.c as schema_name is forbidden raise SQLLineageException("Invalid format for table name: %s.", name) self.schema = Schema(schema_name) self.raw_name = escape_identifier_name(table_name) if schema: warnings.warn("Name is in schema.table format, schema param is ignored")
def _handle_source_table_token(self, sub_token: TokenList) -> None: if isinstance(sub_token, Identifier): if isinstance(sub_token.token_first(skip_cm=True), Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier # and this Identifier's get_real_name method would return alias name dt # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information pass else: self._lineage_result.read.add(Table.create(sub_token)) elif isinstance(sub_token, IdentifierList): # This is to support join in ANSI-89 syntax for token in sub_token.tokens: if isinstance(token, Identifier): self._lineage_result.read.add(Table.create(token)) elif isinstance(sub_token, Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL pass else: raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token))
def _extract_from_DML(self, token: Token) -> None: source_table_token_flag = ( target_table_token_flag) = temp_table_token_flag = False for sub_token in token.tokens: if isinstance(sub_token, TokenList): self._extract_from_DML(sub_token) if sub_token.ttype in Keyword: if any( re.match(regex, sub_token.normalized) for regex in SOURCE_TABLE_TOKENS): source_table_token_flag = True elif sub_token.normalized in TARGET_TABLE_TOKENS: target_table_token_flag = True elif sub_token.normalized in TEMP_TABLE_TOKENS: temp_table_token_flag = True continue if source_table_token_flag: if self.__token_negligible_before_tablename(sub_token): continue else: if isinstance(sub_token, Identifier): if isinstance(sub_token.token_first(skip_cm=True), Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier # and this Identifier's get_real_name method would return alias name dt # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information pass else: self._lineage_result.read.add( Table.create(sub_token)) elif isinstance(sub_token, Parenthesis): # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL pass else: raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) source_table_token_flag = False elif target_table_token_flag: if self.__token_negligible_before_tablename(sub_token): continue elif isinstance(sub_token, Function): # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information if not isinstance(sub_token.token_first(skip_cm=True), Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add( Table.create(sub_token.token_first(skip_cm=True))) elif isinstance(sub_token, Comparison): # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information if not (isinstance(sub_token.left, Identifier) and isinstance(sub_token.right, Identifier)): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create( sub_token.left)) self._lineage_result.read.add(Table.create( sub_token.right)) else: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.write.add(Table.create(sub_token)) target_table_token_flag = False elif temp_table_token_flag: if self.__token_negligible_before_tablename(sub_token): continue else: if not isinstance(sub_token, Identifier): raise SQLLineageException( "An Identifier is expected, got %s[value: %s] instead" % (type(sub_token).__name__, sub_token)) self._lineage_result.intermediate.add( Table.create(sub_token)) self._extract_from_DML(sub_token) temp_table_token_flag = False