Beispiel #1
0
 def _handle_target_table_token(self, sub_token: TokenList) -> None:
     if isinstance(sub_token, Function):
         # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function
         # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information
         if not isinstance(sub_token.token_first(skip_cm=True), Identifier):
             raise SQLLineageException(
                 "An Identifier is expected, got %s[value: %s] instead" %
                 (type(sub_token).__name__, sub_token))
         self._lineage_result.write.add(
             Table.create(sub_token.token_first(skip_cm=True)))
     elif isinstance(sub_token, Comparison):
         # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison
         # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information
         if not (isinstance(sub_token.left, Identifier)
                 and isinstance(sub_token.right, Identifier)):
             raise SQLLineageException(
                 "An Identifier is expected, got %s[value: %s] instead" %
                 (type(sub_token).__name__, sub_token))
         self._lineage_result.write.add(Table.create(sub_token.left))
         self._lineage_result.read.add(Table.create(sub_token.right))
     else:
         if not isinstance(sub_token, Identifier):
             raise SQLLineageException(
                 "An Identifier is expected, got %s[value: %s] instead" %
                 (type(sub_token).__name__, sub_token))
         self._lineage_result.write.add(Table.create(sub_token))
Beispiel #2
0
 def _handle_temp_table_token(self, sub_token: TokenList) -> None:
     if not isinstance(sub_token, Identifier):
         raise SQLLineageException(
             "An Identifier is expected, got %s[value: %s] instead" %
             (type(sub_token).__name__, sub_token))
     self._lineage_result.intermediate.add(Table.create(sub_token))
     self._extract_from_dml(sub_token)
Beispiel #3
0
 def __init__(self, name: str, schema: Schema = Schema()):
     if len(name.split(".")) == 2:
         schema_name, table_name = name.split(".")
         self.schema = Schema(schema_name)
         self.raw_name = table_name
         if schema:
             warnings.warn("Name is in schema.table format, schema param is ignored")
     elif "." not in name:
         self.schema = schema
         self.raw_name = name
     else:
         raise SQLLineageException("Invalid format for table name: %s", name)
Beispiel #4
0
    def __init__(self, name: str, schema: Schema = Schema()):
        """
        Data Class for Table

        :param name: table name
        :param schema: schema as defined by :class:`Schema`
        """
        if "." not in name:
            self.schema = schema
            self.raw_name = escape_identifier_name(name)
        else:
            schema_name, table_name = name.rsplit(".", 1)
            if len(schema_name.split(".")) > 2:
                # allow db.schema as schema_name, but a.b.c as schema_name is forbidden
                raise SQLLineageException("Invalid format for table name: %s.", name)
            self.schema = Schema(schema_name)
            self.raw_name = escape_identifier_name(table_name)
            if schema:
                warnings.warn("Name is in schema.table format, schema param is ignored")
Beispiel #5
0
 def _handle_source_table_token(self, sub_token: TokenList) -> None:
     if isinstance(sub_token, Identifier):
         if isinstance(sub_token.token_first(skip_cm=True), Parenthesis):
             # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier
             # and this Identifier's get_real_name method would return alias name dt
             # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information
             pass
         else:
             self._lineage_result.read.add(Table.create(sub_token))
     elif isinstance(sub_token, IdentifierList):
         # This is to support join in ANSI-89 syntax
         for token in sub_token.tokens:
             if isinstance(token, Identifier):
                 self._lineage_result.read.add(Table.create(token))
     elif isinstance(sub_token, Parenthesis):
         # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis
         # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL
         pass
     else:
         raise SQLLineageException(
             "An Identifier is expected, got %s[value: %s] instead" %
             (type(sub_token).__name__, sub_token))
Beispiel #6
0
 def _extract_from_DML(self, token: Token) -> None:
     source_table_token_flag = (
         target_table_token_flag) = temp_table_token_flag = False
     for sub_token in token.tokens:
         if isinstance(sub_token, TokenList):
             self._extract_from_DML(sub_token)
         if sub_token.ttype in Keyword:
             if any(
                     re.match(regex, sub_token.normalized)
                     for regex in SOURCE_TABLE_TOKENS):
                 source_table_token_flag = True
             elif sub_token.normalized in TARGET_TABLE_TOKENS:
                 target_table_token_flag = True
             elif sub_token.normalized in TEMP_TABLE_TOKENS:
                 temp_table_token_flag = True
             continue
         if source_table_token_flag:
             if self.__token_negligible_before_tablename(sub_token):
                 continue
             else:
                 if isinstance(sub_token, Identifier):
                     if isinstance(sub_token.token_first(skip_cm=True),
                                   Parenthesis):
                         # SELECT col1 FROM (SELECT col2 FROM tab1) dt, the subquery will be parsed as Identifier
                         # and this Identifier's get_real_name method would return alias name dt
                         # referring https://github.com/andialbrecht/sqlparse/issues/218 for further information
                         pass
                     else:
                         self._lineage_result.read.add(
                             Table.create(sub_token))
                 elif isinstance(sub_token, Parenthesis):
                     # SELECT col1 FROM (SELECT col2 FROM tab1), the subquery will be parsed as Parenthesis
                     # This syntax without alias for subquery is invalid in MySQL, while valid for SparkSQL
                     pass
                 else:
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 source_table_token_flag = False
         elif target_table_token_flag:
             if self.__token_negligible_before_tablename(sub_token):
                 continue
             elif isinstance(sub_token, Function):
                 # insert into tab (col1, col2) values (val1, val2); Here tab (col1, col2) will be parsed as Function
                 # referring https://github.com/andialbrecht/sqlparse/issues/483 for further information
                 if not isinstance(sub_token.token_first(skip_cm=True),
                                   Identifier):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.write.add(
                     Table.create(sub_token.token_first(skip_cm=True)))
             elif isinstance(sub_token, Comparison):
                 # create table tab1 like tab2, tab1 like tab2 will be parsed as Comparison
                 # referring https://github.com/andialbrecht/sqlparse/issues/543 for further information
                 if not (isinstance(sub_token.left, Identifier)
                         and isinstance(sub_token.right, Identifier)):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.write.add(Table.create(
                     sub_token.left))
                 self._lineage_result.read.add(Table.create(
                     sub_token.right))
             else:
                 if not isinstance(sub_token, Identifier):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.write.add(Table.create(sub_token))
             target_table_token_flag = False
         elif temp_table_token_flag:
             if self.__token_negligible_before_tablename(sub_token):
                 continue
             else:
                 if not isinstance(sub_token, Identifier):
                     raise SQLLineageException(
                         "An Identifier is expected, got %s[value: %s] instead"
                         % (type(sub_token).__name__, sub_token))
                 self._lineage_result.intermediate.add(
                     Table.create(sub_token))
                 self._extract_from_DML(sub_token)
                 temp_table_token_flag = False