Ejemplo n.º 1
0
    def _parse_order(self, sql):
        """
        parsing the sql by the grammar
        Order ::= asc A | desc A
        A ::= agg column table
        :return: [Order(), states]
        """
        result = []

        if 'order' not in sql['query_toks_no_value'] or 'by' not in sql[
                'query_toks_no_value']:
            return result, None
        elif 'limit' in sql['query_toks_no_value']:
            return result, None
        else:
            if sql['sql']['orderBy'] == []:
                return result, None
            else:
                select = sql['sql']['select'][1]
                if sql['sql']['orderBy'][0] == 'desc':
                    result.append(Order(0))
                else:
                    result.append(Order(1))
                result.append(A(sql['sql']['orderBy'][1][0][1][0]))
                result.append(
                    C(sql['col_set'].index(
                        sql['names'][sql['sql']['orderBy'][1][0][1][1]])))
                if sql['sql']['orderBy'][1][0][1][1] == 0:
                    result.append(self._parser_column0(sql, select))
                else:
                    result.append(
                        T(sql['col_table'][sql['sql']['orderBy'][1][0][1][1]]))
        return result, None
Ejemplo n.º 2
0
    def _parse_sup(self, sql):
        """
        parsing the sql by the grammar
        Sup ::= Most A | Least A
        A ::= agg column table
        :return: [Sup(), states]
        """
        result = []
        select = sql['sql']['select'][1]
        if sql['sql']['limit'] == None:
            return result, None
        if sql['sql']['orderBy'][0] == 'desc':
            result.append(Sup(0))
        else:
            result.append(Sup(1))

        result.append(A(sql['sql']['orderBy'][1][0][1][0]))
        result.append(
            C(sql['col_set'].index(
                sql['names'][sql['sql']['orderBy'][1][0][1][1]])))
        if sql['sql']['orderBy'][1][0][1][1] == 0:
            result.append(self._parser_column0(sql, select))
        else:
            result.append(
                T(sql['col_table'][sql['sql']['orderBy'][1][0][1][1]]))
        return result, None
Ejemplo n.º 3
0
    def test_get_available_class__values_included_if_not_sketch(self):
        # GIVEN
        hypothesis = Beams(is_sketch=False)
        hypothesis.actions = [
            Root1(3),
            Root(3),
            Sel(1),
            N(0),
            A(0),
            C(0),
            T(0),
            Filter(2),
            A(0),
            C(0),
            T(0)
        ]

        # WHEN
        available_class = hypothesis.get_availableClass()

        # THEN
        self.assertEqual(V, available_class)
Ejemplo n.º 4
0
    def padding_sketch(self, sketch):
        """
        Padding the sketch with leaf actions (A, C and T) where necessary.
        While we still don't know the id_c of the leaf actions, we know based on the grammar exactly, where to insert one.
        @param sketch:
        @return:
        """
        padding_result = []
        for action in sketch:
            padding_result.append(action)
            if type(action) == N:
                for _ in range(action.id_c + 1):
                    padding_result.append(A(0))
                    padding_result.append(C(0))
                    padding_result.append(T(0))
            elif type(action) == Filter:
                padding_result.extend(self._padd_filter(action))
            elif type(action) == Order or type(action) == Sup:
                padding_result.append(A(0))
                padding_result.append(C(0))
                padding_result.append(T(0))

        return padding_result
Ejemplo n.º 5
0
    def _parse_select(self, sql):
        """
        parsing the sql by the grammar
        Select ::= A | AA | AAA | ... |
        A ::= agg column table
        :return: [Sel(), states]
        """
        result = []
        is_distinct = sql['sql']['select'][
            0]  # is distinct on the whole select.
        select = sql['sql']['select'][1]

        # as a simplification we assume that if any of the columns is distinct, the whole query is distinct. This might be oversimplified, but
        # it is actually hard to find a way to phrase a real question where some columns are distinct and others not. And in the DEV set, there is also no such example, so we
        # simplified the SemQL language to that.
        if not is_distinct:
            is_distinct = any(sel[1][1][2] for sel in select)

        if is_distinct:
            result.append(Sel(1))
        else:
            result.append(Sel(0))

        result.append(
            N(len(select) - 1)
        )  # N() encapsulates the number of columns. The -1 is used in case there is only one column to select: in that case, #0 of grammar_dict is used, which is 'N A'.

        for sel in select:
            result.append(
                A(sel[0])
            )  # A() represents an aggregator. e.g. #0 is 'none', #3 is 'count'
            result.append(C(sql['col_set'].index(sql['names'][sel[1][1][1]])))
            # now check for the situation with *
            if sel[1][1][1] == 0:
                result.append(
                    self._parser_column0(sql, select)
                )  # The "*" needs an extra handling, as it belongs not to a "normal" table.
            else:
                result.append(
                    T(sql['col_table'][sel[1][1][1]])
                )  # for every other column, we can simply add a T() with the table this column belongs to.

        return result, None
Ejemplo n.º 6
0
    def _padd_filter(action):
        if 'A' in action.production:
            filter_paddings = []
            start_idx = action.production.index('A')
            all_padding_objects = action.production[start_idx:].split(' ')

            for e in all_padding_objects:
                if e == 'A':
                    filter_paddings.append(A(0))
                    filter_paddings.append(C(0))
                    filter_paddings.append(T(0))
                elif e == 'V':
                    filter_paddings.append(V(0))
                elif e == 'Root':
                    # we don't need to do anything for 'Root' -> it will be padded later.
                    continue
                else:
                    raise ValueError("Unknown Action: " + e)

            return filter_paddings
        else:
            return []
Ejemplo n.º 7
0
    def parse_one_condition(self, sql_condit, names, sql):
        result = []
        # check if V(root)
        nest_query = True
        if type(sql_condit[3]) != dict:
            nest_query = False

        if sql_condit[0] == True:
            if sql_condit[1] == 9:
                # not like only with values
                fil = Filter(10)
            elif sql_condit[1] == 8:
                # not in with Root
                fil = Filter(19)
            else:
                print(sql_condit[1])
                raise NotImplementedError("not implement for the others FIL")
        else:
            # check for Filter (<,=,>,!=,between, >=,  <=, ...)
            # Ursin: This map is a mapping between the index of the WHERE_OPS in spider and the Filter() index in SemQL:
            # WHERE_OPS = ('not', 'between', '=', '>', '<', '>=', '<=', '!=', 'in', 'like', 'is', 'exists')
            # Filter --> see Filter-class
            # Example: 1:8 --> the filter type "between" is a 1 in the spider notation, but a 8 in SemQL.
            single_map = {1: 8, 2: 2, 3: 5, 4: 4, 5: 7, 6: 6, 7: 3}
            nested_map = {1: 15, 2: 11, 3: 13, 4: 12, 5: 16, 6: 17, 7: 14}
            if sql_condit[1] in [1, 2, 3, 4, 5, 6, 7]:
                if nest_query == False:
                    fil = Filter(single_map[sql_condit[1]])
                else:
                    fil = Filter(nested_map[sql_condit[1]])
            elif sql_condit[1] == 9:
                fil = Filter(9)
            elif sql_condit[1] == 8:
                fil = Filter(18)
            else:
                print(sql_condit[1])
                raise NotImplementedError("not implement for the others FIL")

        result.append(fil)

        result.append(A(sql_condit[2][1][0]))
        result.append(
            C(sql['col_set'].index(sql['names'][sql_condit[2][1][1]])))
        if sql_condit[2][1][1] == 0:
            select = sql['sql']['select'][1]
            result.append(self._parser_column0(sql, select))
        else:
            result.append(T(sql['col_table'][sql_condit[2][1][1]]))

        # This are filter statements which contain Values - we extend the SemQL AST with a "V" action and use the index
        # of the value based on the provided value list (important: the value needs to exist in the list!)
        if 2 <= fil.id_c <= 10:
            val = sql_condit[3]
            value_action = self._build_value_action(val)
            result.append(value_action)

            # Filter(8) is the "X.Y BETWEEN A AND B" case - here we have to store an additional value.
            if fil.id_c == 8:
                val = sql_condit[4]
                value_action = self._build_value_action(val)
                result.append(value_action)

        # check for the nested value
        if type(sql_condit[3]) == dict:
            nest_query = {}
            nest_query['names'] = names
            nest_query['query_toks_no_value'] = ""
            nest_query['sql'] = sql_condit[3]
            nest_query['col_table'] = sql['col_table']
            nest_query['col_set'] = sql['col_set']
            nest_query['table_names'] = sql['table_names']
            nest_query['question'] = sql['question']
            nest_query['query'] = sql['query']
            nest_query['keys'] = sql['keys']
            result.extend(self.parser(nest_query))

        return result