Beispiel #1
0
    def _process_select_clause(self, stmt, token_group, token, tokengroup_set):
        select_clause_grp = TokenGroup([
            token,
        ], ST.SelectClause)
        if token_group.ttype in (ST.RoundBracket, ST.CollectionSet):
            # TODO: Should we check the preceeding token to make sure SELECT is the first keyword in Brackets
            token_group.ttype = ST.SubQuery
        elif (
                token_group.ttype
                in (ST.Select, ST.SelectInto, ST.InsertIntoSelect, ST.SubQuery)
                and (token_group.last_token().match_type_value(
                    Token(T.Keyword, 'UNION')) or
                     (token_group.last_token().match_type_value(
                         Token(T.Keyword, 'ALL')) and token_group.token_before(
                             next_token_index=token_group.last_token_index()).
                      match_type_value(Token(T.Keyword, 'UNION'))))
        ):  # token_group.has_token_as_immediate_child(Token(T.Keyword, 'UNION')):
            pass
        else:
            while token_group.ttype is not None and token_group.ttype not in tokengroup_set:
                # If Select Clause is preceeded by Insert Into Clause, get out of that Clause Token Group
                token_group = self._switch_to_parent(token_group)
            if token_group.ttype in tokengroup_set:
                token_group = self._switch_to_parent(token_group)

        token_group.append(select_clause_grp)

        # If we are at the Statement Token Group, set the Statement Type
        if token_group == stmt:
            if token_group.ttype is None:
                token_group.ttype = ST.Select
            elif token_group.ttype is ST.Insert:
                token_group.ttype = ST.InsertIntoSelect
        return select_clause_grp
Beispiel #2
0
 def _process_name(self, stmt, token_group, token, tokengroup_set):
     if token_group.last_token().match_type_value(Token(T.Keyword, 'AS')):
         # Alias follows AS Keyword
         token.ttype = ST.AliasName
         token_group.append(token)
     elif (token_group.parent.ttype in (ST.SelectClause, ST.FromClause)
           and ((token_group.ttype == ST.Identifier
                 and token_group.last_token().ttype == T.Name) or
                (token_group.ttype == ST.Function
                 and token_group.last_token().ttype == ST.ArgumentList) or
                (token_group.ttype == ST.SubQuery
                 and token_group.last_token().match_type_value(
                     Token(T.Punctuation, ')'))) or
                (token_group.ttype == ST.ComputedIdentifier
                 and token_group.last_token().ttype != T.Operator)
                or token_group.ttype == ST.SelectConstantIdentifier)):
         # Alias withot AS Keyword. Here TokenGroup has to be direct child of Select/From clause
         # and must satisfy other appropriate conditions
         token.ttype = ST.AliasName
         token_group.append(token)
     elif (token_group.parent.ttype == ST.ComputedIdentifier
           and token_group.ttype == ST.Identifier
           and token_group.last_token().ttype == T.Name):
         # Case: SELECT A.x+B.y SomeAlias FROM ...
         # here Computed Identifier => A.x+B.y and Identifier => B.y followed by Alias
         return self._process_name(stmt,
                                   self._switch_to_parent(token_group),
                                   token, tokengroup_set)
     elif (token_group.ttype == ST.SelectClause
           and token_group.last_token().ttype
           not in (T.Keyword, T.Punctuation)):
         # Case: SELECT CASE ... END AS case_alias
         token_group = token_group.merge_into_token_group(
             ST.ComputedIdentifier,
             token_list_start_index_included=token_group.last_token_index())
         token.ttype = ST.AliasName
         token_group.append(token)
     elif token_group.ttype == ST.Identifier:
         token_group.append(token)
     elif token_group.ttype == PT.WithClause:
         # Eg. WITH aliasname AS (SQL)
         token.ttype = PT.WithQueryAliasName
         with_identifier_grp = TokenGroup([
             token,
         ], PT.WithIdentifier)
         token_group.append(with_identifier_grp)
         token_group = with_identifier_grp
     else:
         identifier_grp = TokenGroup([
             token,
         ], ST.Identifier)
         token_group.append(identifier_grp)
         token_group = identifier_grp
     return token_group
Beispiel #3
0
 def __init__(self,
              lexer_object=SqlLexer,
              end_marker_token=Token(T.Punctuation, ';')):
     self._lexer = lexer_object(stripall=True)
     # self._lexer.add_filter('keywordcase', case='upper')
     self._lexer.add_filter(text_to_whitespace_token())
     self._end_marker_token = end_marker_token
     self._parse_rules = self._set_rules_()
Beispiel #4
0
 def _process_in(self, stmt, token_group, token, tokengroup_set):
     while token_group.ttype not in (ST.JoinOnClause, ST.WhereClause,
                                     ST.HavingClause, ST.Condition,
                                     ST.RoundBracket, ST.ConditionGroup,
                                     ST.Not, ST.CaseExpression,
                                     ST.WhenExpression, ST.ThenExpression,
                                     ST.ElseExpression):
         # Get out of the Token Group until you find Condition Clause
         token_group = self._switch_to_parent(token_group)
     if token_group.ttype == ST.RoundBracket:
         token_group.ttype = ST.ConditionGroup
     if token_group.ttype == ST.Condition:
         # This will mostly be NOT IN Condition
         token_group.ttype = ST.NotIn if token_group.last_token(
         ).match_type_value(Token(ST.LogicalOperator, 'NOT')) else ST.In
     else:
         token_group = token_group.merge_into_token_group(
             ST.In,
             token_list_start_index_included=token_group.last_token_index())
     token_group.append(token)
     return token_group
Beispiel #5
0
 def _process_exists(self, stmt, token_group, token, tokengroup_set):
     while token_group.ttype not in (
             ST.WhereClause,  # ST.JoinOnClause, ST.HavingClause,
             ST.RoundBracket,
             ST.ConditionGroup,
             ST.Not,
     ):  # ST.Condition,
         # ST.CaseExpression, ST.WhenExpression, ST.ThenExpression, ST.ElseExpression):
         # Get out of the Token Group until you find Condition Clause
         token_group = self._switch_to_parent(token_group)
     if token_group.ttype == ST.RoundBracket:
         token_group.ttype = ST.ConditionGroup
     if token_group.ttype == ST.Not and token_group.last_token(
     ).match_type_value(Token(ST.LogicalOperator, 'NOT')):
         token_group.ttype = ST.NotExists
         token_group.append(token)
         return token_group
     exists_grp = TokenGroup([
         token,
     ], ST.Exists)
     token_group.append(exists_grp)
     return exists_grp
Beispiel #6
0
    def datafields_involved(self):
        """ Returns a list of datafields involved in the Postgres 
        SQL Statement.
        Datafield information is generated from the parsed token list
        when the function is called for the first time and stored.
        On subsequent calls, the stored information is returned.

        Returns:
            [List] -- List of Datafields involved in the Postgres
            SQL Statement.
            {type, datafield, dataset, schema, catalog, dataset_alias, datafield_alias, rr_ind, defined_at}
        """
        if self._datafields is None:
            # Datafields info needs to be generated.
            self.datasets_involved()
            self._datafields = []
            for token in self.get_identifiers():
                if token.parent.ttype not in (ST.FromClause, ):
                    _datafield = {
                        'type': 'NotKnown',
                        'datafield': '',
                        'datafield_alias': None,
                        'dataset': None,
                        'schema': None,
                        'catalog': None,
                        'dataset_type': None,
                        'dataset_alias': None,
                        'rw_ind': 'r',
                        'defined_at': token,
                    }
                    if token.ttype == ST.Identifier:
                        _datafield['type'] = 'Datafield'
                        for sub_token in token.token_list:
                            if sub_token.ttype in (T.Name, ST.AllColumnsIdentifier):
                                _datafield['datafield'] = sub_token.value()
                            elif sub_token.ttype == ST.AliasName:
                                _datafield['datafield_alias'] = sub_token.value()
                            elif sub_token.ttype == ST.QualifierName:
                                _datafield['dataset_alias'] = sub_token.value()
                        # Get Dataset info
                        if _datafield['dataset_alias']:
                            dataset_alias_found = False
                            for dset in self._datasets:
                                # Using Alias
                                if _datafield['dataset_alias'] == dset['alias']:
                                    _datafield['dataset'] = dset['dataset']
                                    _datafield['dataset_type'] = dset['type']
                                    _datafield['schema'] = dset['schema']
                                    _datafield['catalog'] = dset['catalog']
                                    dataset_alias_found = True
                                    break
                            if not dataset_alias_found:
                                for dset in self._datasets:
                                    # Using Dataset Name
                                    if _datafield['dataset_alias'] == dset['dataset']:
                                        _datafield['dataset'] = dset['dataset']
                                        _datafield['dataset_type'] = dset['type']
                                        _datafield['schema'] = dset['schema']
                                        _datafield['catalog'] = dset['catalog']
                                        break
                    elif token.ttype in (ST.ComputedIdentifier, ST.SelectConstantIdentifier, ST.Function):
                        _datafield['type'] = 'Computed Field' if token.ttype == ST.ComputedIdentifier else (
                            'Function Field' if token.ttype == ST.Function else 'Constant Field')
                        not_an_alias_ind = True
                        for sub_token in token.token_list:
                            if sub_token.ttype == ST.AliasName:
                                _datafield['datafield_alias'] = sub_token.value()
                                not_an_alias_ind = False
                            elif sub_token.match_type_value(Token(T.Keyword, 'AS')):
                                not_an_alias_ind = False
                            if not_an_alias_ind:
                                _datafield['datafield'] = _datafield['datafield'] + \
                                    sub_token.value(True)
                    else:
                        # No need to process
                        continue
                    self._datafields.append(_datafield)
        return self._datafields
Beispiel #7
0
    def datasets_involved(self):
        """ Returns a list of datasets involved in the Postgres 
        SQL Statement.
        Dataset information is generated from the parsed token list
        when the function is called for the first time and stored.
        On subsequent calls, the stored information is returned.

        Returns:
            [List] -- List of Datasets involved in the Postgres
            SQL Statement.
            {type, dataset, schema, catalog, alias, rr_ind, defined_at}
        """
        if self._datasets is None:
            # Datasets info needs to be generated.
            self._datasets = []
            for token in self.get_identifiers():
                if token.parent.ttype in (ST.FromClause, PT.WithClause):
                    _dataset = {
                        'type': 'NotKnown',
                        'dataset': '',
                        'schema': self._default_schema,
                        'catalog': self._default_catalog,
                        'alias': None,
                        'rw_ind': 'r',
                        'defined_at': token,
                    }
                    if token.ttype == ST.SubQuery:
                        _dataset['type'] = 'Sub Query'
                        subquery_ind = True
                        for subquery_token in token.token_list:
                            if subquery_token.ttype == ST.AliasName:
                                _dataset['alias'] = subquery_token.value()
                                subquery_ind = False
                            elif subquery_token.match_type_value(Token(T.Keyword, 'AS')):
                                subquery_ind = False
                            if subquery_ind:
                                _dataset['dataset'] = _dataset['dataset'] + \
                                    subquery_token.value(True)
                    elif token.ttype == PT.WithIdentifier:
                        _dataset['type'] = 'With Query'
                        for subtoken in token.token_list:
                            if subtoken.ttype == PT.WithQueryAliasName:
                                _dataset['alias'] = subtoken.value()
                            elif subtoken.ttype == PT.WithQueryAliasIdentifier:
                                for subsubtoken in subtoken.token_list:
                                    if subsubtoken.ttype == PT.WithQueryAliasName:
                                        _dataset['alias'] = subtoken.value()
                            elif subtoken.ttype == ST.SubQuery:
                                _dataset['dataset'] = subtoken.value()
                    else:
                        _dataset['type'] = 'Dataset'
                        qualifier = []
                        for sub_token in token.token_list:
                            if sub_token.ttype == T.Name:
                                _dataset['dataset'] = sub_token.value()
                            elif sub_token.ttype == ST.AliasName:
                                _dataset['alias'] = sub_token.value()
                            elif sub_token.ttype == ST.QualifierName:
                                qualifier.append(sub_token.value())
                        if len(qualifier) >= 1:
                            _dataset['schema'] = qualifier[-1]
                        if len(qualifier) == 2:
                            _dataset['catalog'] = qualifier[-2]
                    self._datasets.append(_dataset)
        return self._datasets
Beispiel #8
0
 def _switch_to_parent(self, token_group):
     if token_group.pop_whitespace_token():
         # If Token Group has Whitespace at its end, move ito its Parent Group
         token_group.parent.append(Token(T.Whitespace, ' '))
     return token_group.parent
Beispiel #9
0
 def token_stream(sql_text):
     for ttype, value in self._lexer.get_tokens(sql_text):
         yield Token(ttype, value)