예제 #1
0
파일: parser.py 프로젝트: szhorizon/marquez
    def parse(sql: str) -> SqlMeta:
        if sql is None:
            raise ValueError("A sql statement must be provided.")

        # Tokenize the SQL statement
        statements = sqlparse.parse(sql)

        # We assume only one statement in SQL
        tokens = TokenList(statements[0].tokens)
        log.debug(f"Successfully tokenized sql statement: {tokens}")

        in_tables = []
        out_tables = []

        idx, token = tokens.token_next_by(t=T.Keyword)
        while token:
            if _is_in_table(token):
                idx, in_table = _get_table(tokens, idx)
                in_tables.append(in_table)
            elif _is_out_table(token):
                idx, out_table = _get_table(tokens, idx)
                out_tables.append(out_table)

            idx, token = tokens.token_next_by(t=T.Keyword, idx=idx)

        return SqlMeta(in_tables, out_tables)
예제 #2
0
    def recurse(self, tokens: TokenList) -> SqlMeta:
        in_tables, out_tables = set(), set()
        idx, token = tokens.token_next_by(t=T.Keyword)
        while token:

            # Main parser switch
            if self.is_cte(token):
                cte_name, cte_intables = self.parse_cte(idx, tokens)
                for intable in cte_intables:
                    if intable not in self.ctes:
                        in_tables.add(intable)
            elif _is_in_table(token):
                idx, extracted_tables = _get_tables(tokens, idx,
                                                    self.default_schema)
                for table in extracted_tables:
                    if table.name not in self.ctes:
                        in_tables.add(table)
            elif _is_out_table(token):
                idx, extracted_tables = _get_tables(tokens, idx,
                                                    self.default_schema)
                out_tables.add(
                    extracted_tables[0])  # assuming only one out_table

            idx, token = tokens.token_next_by(t=T.Keyword, idx=idx)

        return SqlMeta(list(in_tables), list(out_tables))
예제 #3
0
    def __get_full_name(tlist: TokenList) -> Optional[str]:
        """
        Return the full unquoted table name if valid, i.e., conforms to the following
        [[cluster.]schema.]table construct.

        :param tlist: The SQL tokens
        :returns: The valid full table name
        """

        # Strip the alias if present.
        idx = len(tlist.tokens)

        if tlist.has_alias():
            ws_idx, _ = tlist.token_next_by(t=Whitespace)

            if ws_idx != -1:
                idx = ws_idx

        tokens = tlist.tokens[:idx]

        if (len(tokens) in (1, 3, 5) and all(
                imt(token, t=[Name, String]) for token in tokens[0::2])
                and all(
                    imt(token, m=(Punctuation, "."))
                    for token in tokens[1::2])):
            return ".".join(
                [remove_quotes(token.value) for token in tokens[0::2]])

        return None
    def _get_table(tlist: TokenList) -> Optional[Table]:
        """
        Return the table if valid, i.e., conforms to the [[catalog.]schema.]table
        construct.

        :param tlist: The SQL tokens
        :returns: The table if the name conforms
        """

        # Strip the alias if present.
        idx = len(tlist.tokens)

        if tlist.has_alias():
            ws_idx, _ = tlist.token_next_by(t=Whitespace)

            if ws_idx != -1:
                idx = ws_idx

        tokens = tlist.tokens[:idx]

        if (len(tokens) in (1, 3, 5)
                and all(imt(token, t=[Name, String]) for token in tokens[::2])
                and all(
                    imt(token, m=(Punctuation, "."))
                    for token in tokens[1::2])):
            return Table(
                *[remove_quotes(token.value) for token in tokens[::-2]])

        return None
예제 #5
0
def _define_primary_key(
    metadata: AllFieldMetadata,
    column_definition_group: token_groups.TokenList,
) -> typing.Optional[AllFieldMetadata]:
    idx, constraint_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "CONSTRAINT"))

    idx, primary_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "PRIMARY"), idx=(idx or -1))

    if constraint_keyword is not None and primary_keyword is None:
        raise exceptions.NotSupportedError(
            "When a column definition clause begins with CONSTRAINT, "
            "only a PRIMARY KEY constraint is supported")

    if primary_keyword is None:
        return None

    # If the keyword isn't followed by column name(s), then it's part of
    # a regular column definition and should be handled by _define_column
    if not _contains_column_name(column_definition_group, idx):
        return None

    new_metadata: AllFieldMetadata = deepcopy(metadata)

    while True:
        idx, primary_key_column = column_definition_group.token_next_by(
            t=token_types.Name, idx=idx)

        # 'id' is defined and managed by Fauna, so we ignore any attempts
        # to manage it from SQLAlchemy
        if primary_key_column is None or primary_key_column.value == "id":
            break

        primary_key_column_name = primary_key_column.value

        new_metadata[primary_key_column_name] = {
            **DEFAULT_FIELD_METADATA,  # type: ignore
            **new_metadata.get(primary_key_column_name, {}),  # type: ignore
            "unique":
            True,
            "not_null":
            True,
        }

    return new_metadata
예제 #6
0
def _define_unique_constraint(
    metadata: AllFieldMetadata,
    column_definition_group: token_groups.TokenList,
) -> typing.Optional[AllFieldMetadata]:
    idx, unique_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "UNIQUE"))

    if unique_keyword is None:
        return None

    # If the keyword isn't followed by column name(s), then it's part of
    # a regular column definition and should be handled by _define_column
    if not _contains_column_name(column_definition_group, idx):
        return None

    new_metadata = deepcopy(metadata)

    while True:
        idx, unique_key_column = column_definition_group.token_next_by(
            t=token_types.Name, idx=idx)

        # 'id' is defined and managed by Fauna, so we ignore any attempts
        # to manage it from SQLAlchemy
        if unique_key_column is None or unique_key_column.value == "id":
            break

        unique_key_column_name = unique_key_column.value

        new_metadata[unique_key_column_name] = {
            **DEFAULT_FIELD_METADATA,  # type: ignore
            **new_metadata.get(unique_key_column_name, {}),  # type: ignore
            "unique":
            True,
        }

    return new_metadata
예제 #7
0
def _define_column(
    metadata: AllFieldMetadata,
    column_definition_group: token_groups.TokenList,
) -> AllFieldMetadata:
    idx, column = column_definition_group.token_next_by(t=token_types.Name)
    column_name = column.value

    # "id" is auto-generated by Fauna, so we ignore it in SQL column definitions
    if column_name == "id":
        return metadata

    idx, data_type = column_definition_group.token_next_by(t=token_types.Name,
                                                           idx=idx)
    _, not_null_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "NOT NULL"))
    _, unique_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "UNIQUE"))
    _, primary_key_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "PRIMARY KEY"))
    _, default_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "DEFAULT"))
    _, check_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "CHECK"))

    if check_keyword is not None:
        raise exceptions.NotSupportedError("CHECK keyword is not supported.")

    column_metadata: typing.Union[FieldMetadata,
                                  typing.Dict[str, str]] = metadata.get(
                                      column_name, {})
    is_primary_key = primary_key_keyword is not None
    is_not_null = (not_null_keyword is not None or is_primary_key
                   or column_metadata.get("not_null") or False)
    is_unique = (unique_keyword is not None or is_primary_key
                 or column_metadata.get("unique") or False)
    default_value = (default_keyword if default_keyword is None else
                     sql.extract_value(default_keyword.value))

    return {
        **metadata,
        column_name: {
            **DEFAULT_FIELD_METADATA,  # type: ignore
            **metadata.get(column_name, EMPTY_DICT),  # type: ignore
            "unique": is_unique,
            "not_null": is_not_null,
            "default": default_value,
            "type": DATA_TYPE_MAP[data_type.value],
        },
    }
예제 #8
0
def _extract_limit_from_query(statement: TokenList) -> Optional[int]:
    """
    Extract limit clause from SQL statement.

    :param statement: SQL statement
    :return: Limit extracted from query, None if no limit present in statement
    """
    idx, _ = statement.token_next_by(m=(Keyword, "LIMIT"))
    if idx is not None:
        _, token = statement.token_next(idx=idx)
        if token:
            if isinstance(token, IdentifierList):
                # In case of "LIMIT <offset>, <limit>", find comma and extract
                # first succeeding non-whitespace token
                idx, _ = token.token_next_by(m=(sqlparse.tokens.Punctuation, ","))
                _, token = token.token_next(idx=idx)
            if token and token.ttype == sqlparse.tokens.Literal.Number.Integer:
                return int(token.value)
    return None
예제 #9
0
def _define_foreign_key_constraint(
    metadata: AllFieldMetadata, column_definition_group: token_groups.TokenList
) -> typing.Optional[AllFieldMetadata]:
    idx, foreign_keyword = column_definition_group.token_next_by(
        m=(token_types.Keyword, "FOREIGN"))
    if foreign_keyword is None:
        return None

    idx, _ = column_definition_group.token_next_by(m=(token_types.Name, "KEY"),
                                                   idx=idx)
    idx, foreign_key_column = column_definition_group.token_next_by(
        t=token_types.Name, idx=idx)
    column_name = foreign_key_column.value

    idx, _ = column_definition_group.token_next_by(m=(token_types.Keyword,
                                                      "REFERENCES"),
                                                   idx=idx)
    idx, reference_table = column_definition_group.token_next_by(
        t=token_types.Name, idx=idx)
    reference_table_name = reference_table.value
    idx, reference_column = column_definition_group.token_next_by(
        t=token_types.Name, idx=idx)
    reference_column_name = reference_column.value

    if any(
            metadata.get(column_name, EMPTY_DICT).get("references",
                                                      EMPTY_DICT)):
        raise exceptions.NotSupportedError(
            "Foreign keys with multiple references are not currently supported."
        )

    if reference_column_name != "id":
        raise exceptions.NotSupportedError(
            "Foreign keys referring to fields other than ID are not currently supported."
        )

    return {
        **metadata,
        column_name: {
            **DEFAULT_FIELD_METADATA,  # type: ignore
            **metadata.get(column_name, EMPTY_DICT),
            "references": {
                reference_table_name: reference_column_name
            },
        },
    }