예제 #1
0
 def _handle_target_table_token(self, sub_token: TokenList) -> None:
     if isinstance(sub_token, Function):
         # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function
         # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information
         if not isinstance(sub_token.token_first(skip_cm=True), Identifier):
             raise SQLLineageException(
                 "An Identifier is expected, got %s[value: %s] instead" %
                 (type(sub_token).__name__, sub_token))
         self._lineage_result.write.add(
             Table.create(sub_token.token_first(skip_cm=True)))
     elif isinstance(sub_token, Comparison):
         # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison
         # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information
         if not (isinstance(sub_token.left, Identifier)
                 and isinstance(sub_token.right, Identifier)):
             raise SQLLineageException(
                 "An Identifier is expected, got %s[value: %s] instead" %
                 (type(sub_token).__name__, sub_token))
         self._lineage_result.write.add(Table.create(sub_token.left))
         self._lineage_result.read.add(Table.create(sub_token.right))
     else:
         if not isinstance(sub_token, Identifier):
             raise SQLLineageException(
                 "An Identifier is expected, got %s[value: %s] instead" %
                 (type(sub_token).__name__, sub_token))
         self._lineage_result.write.add(Table.create(sub_token))
예제 #2
0
 def _handle_temp_table_token(self, sub_token: TokenList) -> None:
     if isinstance(sub_token, Identifier):
         self._lineage_result.intermediate.add(Table.create(sub_token))
         self._extract_from_dml(sub_token)
     elif isinstance(sub_token, IdentifierList):
         for temp_tab_token in sub_token:
             if isinstance(temp_tab_token, Identifier):
                 self._lineage_result.intermediate.add(
                     Table.create(temp_tab_token))
                 self._extract_from_dml(temp_tab_token)
     else:
         raise SQLLineageException(
             "An Identifier or IdentifierList is expected, got %s[value: %s] instead"
             % (type(sub_token).__name__, sub_token))
예제 #3
0
 def _extract_from_ddl_alter(self, stmt: Statement) -> None:
     tables = [
         Table.create(t) for t in stmt.tokens if isinstance(t, Identifier)
     ]
     keywords = [t for t in stmt.tokens if t.is_keyword]
     if any(k.normalized == "RENAME"
            for k in keywords) and len(tables) == 2:
         self._lineage_result.rename.add((tables[0], tables[1]))
예제 #4
0
 def _handle_source_table_token(self, sub_token: TokenList) -> None:
     if isinstance(sub_token, Identifier):
         if isinstance(sub_token.token_first(skip_cm=True), Parenthesis):
             # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier
             # and this Identifier's get_real_name method would return alias name dt
             # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information
             pass
         else:
             self._lineage_result.read.add(Table.create(sub_token))
     elif isinstance(sub_token, IdentifierList):
         # This is to support join in ANSI-89 syntax
         for token in sub_token.tokens:
             if isinstance(token, Identifier):
                 self._lineage_result.read.add(Table.create(token))
     elif isinstance(sub_token, Parenthesis):
         # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis
         # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL
         pass
     else:
         raise SQLLineageException(
             "An Identifier is expected, got %s[value: %s] instead" %
             (type(sub_token).__name__, sub_token))
예제 #5
0
 def _extract_from_ddl_drop(self, stmt: Statement) -> None:
     for table in {
             Table.create(t)
             for t in stmt.tokens if isinstance(t, Identifier)
     }:
         self._lineage_result.drop.add(table)
예제 #6
0
 def _extract_from_DML(self, token: Token) -> None:
     source_table_token_flag = (
         target_table_token_flag) = temp_table_token_flag = False
     for sub_token in token.tokens:
         if isinstance(sub_token, TokenList):
             self._extract_from_DML(sub_token)
         if sub_token.ttype in Keyword:
             if any(
                     re.match(regex, sub_token.normalized)
                     for regex in SOURCE_TABLE_TOKENS):
                 source_table_token_flag = True
             elif sub_token.normalized in TARGET_TABLE_TOKENS:
                 target_table_token_flag = True
             elif sub_token.normalized in TEMP_TABLE_TOKENS:
                 temp_table_token_flag = True
             continue
         if source_table_token_flag:
             if self.__token_negligible_before_tablename(sub_token):
                 continue
             else:
                 if isinstance(sub_token, Identifier):
                     if isinstance(sub_token.token_first(skip_cm=True),
                                   Parenthesis):
                         # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier
                         # and this Identifier's get_real_name method would return alias name dt
                         # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information
                         pass
                     else:
                         self._lineage_result.read.add(
                             Table.create(sub_token))
                 elif isinstance(sub_token, Parenthesis):
                     # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis
                     # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL
                     pass
                 else:
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 source_table_token_flag = False
         elif target_table_token_flag:
             if self.__token_negligible_before_tablename(sub_token):
                 continue
             elif isinstance(sub_token, Function):
                 # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function
                 # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information
                 if not isinstance(sub_token.token_first(skip_cm=True),
                                   Identifier):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.write.add(
                     Table.create(sub_token.token_first(skip_cm=True)))
             elif isinstance(sub_token, Comparison):
                 # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison
                 # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information
                 if not (isinstance(sub_token.left, Identifier)
                         and isinstance(sub_token.right, Identifier)):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.write.add(Table.create(
                     sub_token.left))
                 self._lineage_result.read.add(Table.create(
                     sub_token.right))
             else:
                 if not isinstance(sub_token, Identifier):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.write.add(Table.create(sub_token))
             target_table_token_flag = False
         elif temp_table_token_flag:
             if self.__token_negligible_before_tablename(sub_token):
                 continue
             else:
                 if not isinstance(sub_token, Identifier):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.intermediate.add(
                     Table.create(sub_token))
                 self._extract_from_DML(sub_token)
                 temp_table_token_flag = False