Ejemplo n.º 1
0
def run(query, frum=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if frum == None:
        query_op = QueryOp.wrap(query)
        frum = query_op.frum
    else:
        query_op = QueryOp.wrap(query, frum.schema)

    if isinstance(frum, Container):
        return frum.query(query_op)
    elif isinstance(frum, (list, set, GeneratorType)):
        frum = wrap(list(frum))
    elif isinstance(frum, Cube):
        if is_aggs(query_op):
            return cube_aggs(frum, query_op)

    elif isinstance(frum, QueryOp):
        frum = run(frum)
    else:
        Log.error("Do not know how to handle {{type}}",
                  type=frum.__class__.__name__)

    if is_aggs(query_op):
        frum = list_aggs(frum, query_op)
    else:  # SETOP
        # try:
        #     if query.filter != None or query.esfilter != None:
        #         Log.error("use 'where' clause")
        # except AttributeError:
        #     pass

        if query_op.where is not TRUE_FILTER:
            frum = filter(frum, query_op.where)

        if query_op.sort:
            frum = sort(frum, query_op.sort, already_normalized=True)

        if query_op.select:
            frum = select(frum, query_op.select)

    if query_op.window:
        if isinstance(frum, Cube):
            frum = list(frum.values())

        for param in query_op.window:
            window(frum, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        frum = convert.list2cube(frum)
    elif query_op.format == "table":
        frum = convert.list2table(frum)
        frum.meta.format = "table"
    else:
        frum = wrap({"meta": {"format": "list"}, "data": frum})

    return frum
Ejemplo n.º 2
0
    def _convert_query(self, query):
        output = QueryOp(None)
        output.select = self._convert_clause(query.select)
        output.where = self.convert(query.where)
        output.frum = self._convert_from(query.frum)
        output.edges = convert_list(self._convert_edge, query.edges)
        output.having = convert_list(self._convert_having, query.having)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_clause(query.sort)
        output.format = query.format

        return output
Ejemplo n.º 3
0
    def _convert_query(self, query):
        output = QueryOp(None)
        output.select = self._convert_clause(query.select)
        output.where = self.convert(query.where)
        output.frum = self._convert_from(query.frum)
        output.edges = convert_list(self._convert_edge, query.edges)
        output.having = convert_list(self._convert_having, query.having)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_clause(query.sort)
        output.format = query.format

        return output
Ejemplo n.º 4
0
 def query(self, _query):
     return self.meta.columns.query(
         QueryOp(
             set_default(
                 {
                     "from": self.meta.columns,
                     "sort": ["table", "name"]
                 }, _query.as_dict())))
Ejemplo n.º 5
0
def run(query, frum=None):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    query = QueryOp.wrap(query, frum.schema)
    frum = coalesce(frum, query["from"])
    if isinstance(frum, Container):
        return frum.query(query)
    elif isinstance(frum, (list, set, GeneratorType)):
        frum = wrap(list(frum))
    elif isinstance(frum, Cube):
        if is_aggs(query):
            return cube_aggs(frum, query)

    elif isinstance(frum, QueryOp):
        frum = run(frum)
    else:
        Log.error("Do not know how to handle {{type}}",  type=frum.__class__.__name__)

    if is_aggs(query):
        frum = list_aggs(frum, query)
    else:  # SETOP
        # try:
        #     if query.filter != None or query.esfilter != None:
        #         Log.error("use 'where' clause")
        # except AttributeError:
        #     pass

        if query.where is not TRUE_FILTER:
            frum = filter(frum, query.where)

        if query.sort:
            frum = sort(frum, query.sort, already_normalized=True)

        if query.select:
            frum = select(frum, query.select)

    if query.window:
        if isinstance(frum, Cube):
            frum = list(frum.values())

        for param in query.window:
            window(frum, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query.format == "cube":
        frum = convert.list2cube(frum)
    elif query.format == "table":
        frum = convert.list2table(frum)
        frum.meta.format = "table"
    else:
        frum = wrap({
            "meta": {"format": "list"},
            "data": frum
        })

    return frum
Ejemplo n.º 6
0
    def query(self, query, stacked=False):
        """
        TRANSLATE JSON QUERY EXPRESSION ON SINGLE TABLE TO SQL QUERY
        """
        from pyLibrary.queries.query import QueryOp

        query = QueryOp.wrap(query)

        sql, post = self._subquery(query, isolate=False, stacked=stacked)
        query.data = post(sql)
        return query.data
Ejemplo n.º 7
0
    def query(self, query, stacked=False):
        """
        TRANSLATE JSON QUERY EXPRESSION ON SINGLE TABLE TO SQL QUERY
        """
        from pyLibrary.queries.query import QueryOp

        query = QueryOp.wrap(query)

        sql, post = self._subquery(query, isolate=False, stacked=stacked)
        query.data = post(sql)
        return query.data
Ejemplo n.º 8
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error(
                "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = split_field(frum)[0]

        settings = set_default({
            "index": index,
            "name": frum,
            "exists": True,
        }, _containers.config.default.settings)
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Ejemplo n.º 9
0
    def query(self, query):
        query = QueryOp.wrap(query)
        short_list = self._filter(query.where)
        if query.sort:
            short_list = self._sort(query.sort)

        if isinstance(query.select, list):
            accessors = map(jx.get, query.select.value)

        if query.window:
            for w in query.window:
                window_list = self._filter(w.where)
Ejemplo n.º 10
0
def compare_to_expected(query, result, expect):
    query = wrap(query)
    expect = wrap(expect)

    if result.meta.format == "table":
        assertAlmostEqual(set(result.header), set(expect.header))

        # MAP FROM expected COLUMN TO result COLUMN
        mapping = zip(*zip(*filter(
            lambda v: v[0][1] == v[1][1],
            itertools.product(enumerate(expect.header), enumerate(
                result.header))))[1])[0]
        result.header = [result.header[m] for m in mapping]

        if result.data:
            columns = zip(*unwrap(result.data))
            result.data = zip(*[columns[m] for m in mapping])

        if not query.sort:
            sort_table(result)
            sort_table(expect)
    elif result.meta.format == "list":
        if query["from"].startswith("meta."):
            pass
        else:
            query = QueryOp.wrap(query)

        if not query.sort:
            try:
                #result.data MAY BE A LIST OF VALUES, NOT OBJECTS
                data_columns = jx.sort(
                    set(jx.get_columns(result.data, leaves=True))
                    | set(jx.get_columns(expect.data, leaves=True)), "name")
            except Exception:
                data_columns = [{"name": "."}]

            sort_order = listwrap(coalesce(query.edges,
                                           query.groupby)) + data_columns

            if isinstance(expect.data, list):
                try:
                    expect.data = jx.sort(expect.data, sort_order.name)
                except Exception, _:
                    pass

            if isinstance(result.data, list):
                try:
                    result.data = jx.sort(result.data, sort_order.name)
                except Exception, _:
                    pass
Ejemplo n.º 11
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = split_field(frum)[0]

        settings = set_default(
            {
                "index": index,
                "name": frum,
                "exists": True,
            },
            _containers.config.default.settings
        )
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(frum, Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Ejemplo n.º 12
0
    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif isinstance(frum, (list, set, GeneratorType)):
            return _ListContainer(frum)
        elif isinstance(frum, basestring):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error(
                    "expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info"
                )

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                }, config.default.settings)
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif isinstance(frum, Mapping):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}",
                      type=frum.__class__.__name__)
Ejemplo n.º 13
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, schema=self)

            for n in self.namespaces:
                query = n.convert(query)
            if self.typed:
                query = Typed().convert(query)

            for s in listwrap(query.select):
                if not aggregates1_4.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            if es09_setop.is_setop(query):
                return es09_setop.es_setop(self._es, None, query)
            if es09_aggop.is_aggop(query):
                return es09_aggop.es_aggop(self._es, None, query)
            Log.error("Can not handle")
        except Exception, e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path+"/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Ejemplo n.º 14
0
    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif isinstance(frum, (list, set, GeneratorType)):
            return _ListContainer(frum)
        elif isinstance(frum, basestring):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                },
                config.default.settings
            )
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif isinstance(frum, Mapping):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
Ejemplo n.º 15
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, schema=self)

            for n in self.namespaces:
                query = n.convert(query)
            if self.typed:
                query = Typed().convert(query)

            for s in listwrap(query.select):
                if not aggregates1_4.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate,
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            if es09_setop.is_setop(query):
                return es09_setop.es_setop(self._es, None, query)
            if es09_aggop.is_aggop(query):
                return es09_aggop.es_aggop(self._es, None, query)
            Log.error("Can not handle")
        except Exception, e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path + "/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Ejemplo n.º 16
0
    def query(self, query):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        if not startswith_field(query['from'], self.name):
            Log.error("Expecting table, or some nested table")
        frum, query['from'] = query['from'], self
        query = QueryOp.wrap(query, self.columns)

        # TYPE CONFLICTS MUST NOW BE RESOLVED DURING
        # TYPE-SPECIFIC QUERY NORMALIZATION
        # vars_ = query.vars(exclude_select=True)
        # type_map = {
        #     v: c.es_column
        #     for v in vars_
        #     if v in self.columns and len([c for c in self.columns[v] if c.type != "nested"]) == 1
        #     for c in self.columns[v]
        #     if c.type != "nested"
        # }
        #
        # sql_query = query.map(type_map)
        query = query

        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = "CREATE TABLE " + quote_table(new_table) + " AS "
        else:
            create_table = ""

        if query.groupby:
            op, index_to_columns = self._groupby_op(query, frum)
            command = create_table + op
        elif query.edges or any(a != "none"
                                for a in listwrap(query.select).aggregate):
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
        else:
            op = self._set_op(query, frum)
            return op

        if query.sort:
            command += "\nORDER BY " + ",\n".join(
                "(" + sql[t] + ") IS NULL" +
                (" DESC" if s.sort == -1 else "") + ",\n" + sql[t] +
                (" DESC" if s.sort == -1 else "")
                for s, sql in [(s, s.value.to_sql(self)[0].sql)
                               for s in query.sort] for t in "bns" if sql[t])

        result = self.db.query(command)

        column_names = query.edges.name + query.groupby.name + listwrap(
            query.select).name
        if query.format == "container":
            output = QueryTable(new_table,
                                db=self.db,
                                uid=self.uid,
                                exists=True)
        elif query.format == "cube" or (not query.format and query.edges):
            if len(query.edges) == 0 and len(query.groupby) == 0:
                data = {n: Data() for n in column_names}
                for s in index_to_columns.values():
                    data[s.push_name][s.push_child] = unwrap(
                        s.pull(result.data[0]))
                return Data(data=unwrap(data), meta={"format": "cube"})

            if not result.data:
                edges = []
                dims = []
                for i, e in enumerate(query.edges + query.groupby):
                    allowNulls = coalesce(e.allowNulls, True)

                    if e.domain.type == "set" and e.domain.partitions:
                        domain = SimpleSetDomain(
                            partitions=e.domain.partitions.name)
                    elif e.domain.type == "range":
                        domain = e.domain
                    elif isinstance(e.value, TupleOp):
                        pulls = jx.sort([
                            c for c in index_to_columns.values()
                            if c.push_name == e.name
                        ], "push_child").pull
                        parts = [
                            tuple(p(d) for p in pulls) for d in result.data
                        ]
                        domain = SimpleSetDomain(
                            partitions=jx.sort(set(parts)))
                    else:
                        domain = SimpleSetDomain(partitions=[])

                    dims.append(1 if allowNulls else 0)
                    edges.append(
                        Data(name=e.name, allowNulls=allowNulls,
                             domain=domain))

                zeros = [
                    0 if s.aggregate == "count"
                    and index_to_columns[si].push_child == "." else Data
                    for si, s in enumerate(listwrap(query.select))
                ]
                data = {
                    s.name: Matrix(dims=dims, zeros=zeros[si])
                    for si, s in enumerate(listwrap(query.select))
                }

                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(meta={"format": "cube"},
                            edges=edges,
                            select=select,
                            data={k: v.cube
                                  for k, v in data.items()})

            columns = None

            edges = []
            dims = []
            for g in query.groupby:
                g.is_groupby = True

            for i, e in enumerate(query.edges + query.groupby):
                allowNulls = coalesce(e.allowNulls, True)

                if e.domain.type == "set" and e.domain.partitions:
                    domain = SimpleSetDomain(
                        partitions=e.domain.partitions.name)
                elif e.domain.type == "range":
                    domain = e.domain
                elif e.domain.type == "time":
                    domain = wrap(mo_json.scrub(e.domain))
                elif e.domain.type == "duration":
                    domain = wrap(mo_json.scrub(e.domain))
                elif isinstance(e.value, TupleOp):
                    pulls = jx.sort([
                        c for c in index_to_columns.values()
                        if c.push_name == e.name
                    ], "push_child").pull
                    parts = [tuple(p(d) for p in pulls) for d in result.data]
                    domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                else:
                    if not columns:
                        columns = zip(*result.data)
                    parts = set(columns[i])
                    if e.is_groupby and None in parts:
                        allowNulls = True
                    parts -= {None}
                    domain = SimpleSetDomain(partitions=jx.sort(parts))

                dims.append(len(domain.partitions) + (1 if allowNulls else 0))
                edges.append(
                    Data(name=e.name, allowNulls=allowNulls, domain=domain))

            zeros = [
                0 if s.aggregate == "count"
                and index_to_columns[si].push_child == "." else Data
                for si, s in enumerate(listwrap(query.select))
            ]
            data_cubes = {
                s.name: Matrix(dims=dims, zeros=zeros[si])
                for si, s in enumerate(listwrap(query.select))
            }
            r2c = index_to_coordinate(
                dims)  # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM
            for rownum, row in enumerate(result.data):
                coord = r2c(rownum)

                for i, s in enumerate(index_to_columns.values()):
                    if s.is_edge:
                        continue
                    if s.push_child == ".":
                        data_cubes[s.push_name][coord] = s.pull(row)
                    else:
                        data_cubes[s.push_name][coord][s.push_child] = s.pull(
                            row)

            if isinstance(query.select, list):
                select = [{"name": s.name} for s in query.select]
            else:
                select = {"name": query.select.name}

            return Data(meta={"format": "cube"},
                        edges=edges,
                        select=select,
                        data={k: v.cube
                              for k, v in data_cubes.items()})
        elif query.format == "table" or (not query.format and query.groupby):
            data = []
            for d in result.data:
                row = [None for _ in column_names]
                for s in index_to_columns.values():
                    if s.push_child == ".":
                        row[s.push_column] = s.pull(d)
                    elif s.num_push_columns:
                        tuple_value = row[s.push_column]
                        if tuple_value == None:
                            tuple_value = row[
                                s.push_column] = [None] * s.num_push_columns
                        tuple_value[s.push_child] = s.pull(d)
                    elif row[s.push_column] == None:
                        row[s.push_column] = Data()
                        row[s.push_column][s.push_child] = s.pull(d)
                    else:
                        row[s.push_column][s.push_child] = s.pull(d)
                data.append(tuple(unwrap(r) for r in row))

            output = Data(meta={"format": "table"},
                          header=column_names,
                          data=data)
        elif query.format == "list" or (not query.edges and not query.groupby):

            if not query.edges and not query.groupby and any(
                    listwrap(query.select).aggregate):
                if isinstance(query.select, list):
                    data = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            data[c.push_name] = c.pull(result.data[0])
                        else:
                            data[c.push_name][c.push_child] = c.pull(
                                result.data[0])

                    output = Data(meta={"format": "value"}, data=data)
                else:
                    data = Data()
                    for s in index_to_columns.values():
                        data[s.push_child] = s.pull(result.data[0])

                    output = Data(meta={"format": "value"}, data=unwrap(data))
            else:
                data = []
                for rownum in result.data:
                    row = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            row[c.push_name] = c.pull(rownum)
                        elif c.num_push_columns:
                            tuple_value = row[c.push_name]
                            if not tuple_value:
                                tuple_value = row[
                                    c.push_name] = [None] * c.num_push_columns
                            tuple_value[c.push_child] = c.pull(rownum)
                        else:
                            row[c.push_name][c.push_child] = c.pull(rownum)

                    data.append(row)

                output = Data(meta={"format": "list"}, data=data)
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
    def query(self, query):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        query["from"] = self
        query = QueryOp.wrap(query)

        # TYPE CONFLICTS MUST NOW BE RESOLVED DURING
        # TYPE-SPECIFIC QUERY NORMALIZATION
        vars_ = query.vars(exclude_select=True)
        type_map = {
            v: c.es_column
            for v in vars_
            if v in self.columns and len([c for c in self.columns[v] if c.type != "nested"]) == 1
            for c in self.columns[v]
            if c.type != "nested"
            }

        sql_query = query.map(type_map)

        new_table = "temp_"+unique_name()

        if query.format == "container":
            create_table = "CREATE TABLE " + quote_table(new_table) + " AS "
        else:
            create_table = ""

        if sql_query.edges:
            command = create_table + self._edges_op(sql_query)
        elif sql_query.groupby:
            command = create_table + self._groupby_op(sql_query)
        else:
            command = create_table + self._set_op(sql_query)

        if sql_query.sort:
            command += "\nORDER BY " + ",\n".join(
                s.value.to_sql() + (" DESC" if s.sort == -1 else "")
                for s in sql_query.sort
            )

        result = self.db.query(command)

        column_names = query.column_names
        if query.format == "container":
            output = Table_usingSQLite(new_table, db=self.db, uid=self.uid, exists=True)
        elif query.format == "cube" or query.edges:
            if len(query.edges) > 1:
                Log.error("Only support one dimension right now")

            if not result.data:
                return Dict(
                    data={}
                )

            columns = zip(*result.data)

            edges = []
            ci = []
            for i, e in enumerate(query.edges):
                if e.domain.type != "default":
                    Log.error("Can only handle default domains")
                ci.append(i - len(query.edges))
                parts = columns[ci[i]]
                allowNulls=False
                if parts[0]==None:
                    allowNulls=True
                    # ONLY ONE EDGE, SO WE CAN DO THIS TO PUT NULL LAST
                    for ii, c in enumerate(copy(columns)):
                        columns[ii] = list(c[1:]) + [c[0]]
                    parts = parts[1:]

                edges.append(Dict(
                    name=e.name,
                    allowNulls=allowNulls,
                    domain=SimpleSetDomain(partitions=parts)
                ))

            data = {s.name: columns[i] for i, s in enumerate(sql_query.select)}

            return Dict(
                edges=edges,
                data=data
            )
        elif query.format == "list" or (not query.edges and not query.groupby):
            output = Dict(
                meta={"format": "list"},
                header=column_names,
                data=[{c: v for c, v in zip(column_names, r)} for r in result.data]
            )
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
Ejemplo n.º 18
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = wrap(query)

        output = QueryOp("from", None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {"name": "count", "value": ".", "aggregate": "count", "default": 0}
            else:
                output.select = {"name": "__all__", "value": "*", "aggregate": "none"}

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and c.nested_path:
                Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name)

        output.having = convert_list(self._convert_having, query.having)

        return output
Ejemplo n.º 19
0
    def query(self, query):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        query["from"] = self
        query = QueryOp.wrap(query)

        # TYPE CONFLICTS MUST NOW BE RESOLVED DURING
        # TYPE-SPECIFIC QUERY NORMALIZATION
        vars_ = query.vars(exclude_select=True)
        type_map = {
            v: c.es_column
            for v in vars_ if v in self.columns
            and len([c for c in self.columns[v] if c.type != "nested"]) == 1
            for c in self.columns[v] if c.type != "nested"
        }

        sql_query = query.map(type_map)

        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = "CREATE TABLE " + quote_table(new_table) + " AS "
        else:
            create_table = ""

        if sql_query.edges:
            command = create_table + self._edges_op(sql_query)
        elif sql_query.groupby:
            command = create_table + self._groupby_op(sql_query)
        else:
            command = create_table + self._set_op(sql_query)

        if sql_query.sort:
            command += "\nORDER BY " + ",\n".join(
                s.value.to_sql() + (" DESC" if s.sort == -1 else "")
                for s in sql_query.sort)

        result = self.db.query(command)

        column_names = query.column_names
        if query.format == "container":
            output = Table_usingSQLite(new_table,
                                       db=self.db,
                                       uid=self.uid,
                                       exists=True)
        elif query.format == "cube" or query.edges:
            if len(query.edges) > 1:
                Log.error("Only support one dimension right now")

            if not result.data:
                return Dict(data={})

            columns = zip(*result.data)

            edges = []
            ci = []
            for i, e in enumerate(query.edges):
                if e.domain.type != "default":
                    Log.error("Can only handle default domains")
                ci.append(i - len(query.edges))
                parts = columns[ci[i]]
                allowNulls = False
                if parts[0] == None:
                    allowNulls = True
                    # ONLY ONE EDGE, SO WE CAN DO THIS TO PUT NULL LAST
                    for ii, c in enumerate(copy(columns)):
                        columns[ii] = list(c[1:]) + [c[0]]
                    parts = parts[1:]

                edges.append(
                    Dict(name=e.name,
                         allowNulls=allowNulls,
                         domain=SimpleSetDomain(partitions=parts)))

            data = {s.name: columns[i] for i, s in enumerate(sql_query.select)}

            return Dict(edges=edges, data=data)
        elif query.format == "list" or (not query.edges and not query.groupby):
            output = Dict(meta={"format": "list"},
                          header=column_names,
                          data=[{c: v
                                 for c, v in zip(column_names, r)}
                                for r in result.data])
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
Ejemplo n.º 20
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = wrap(query)

        output = QueryOp("from", None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {
                    "name": "count",
                    "value": ".",
                    "aggregate": "count",
                    "default": 0
                }
            else:
                output.select = {
                    "name": "__all__",
                    "value": "*",
                    "aggregate": "none"
                }

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(
            output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and c.nested_path:
                Log.error("This query, with variable {{var_name}} is too deep",
                          var_name=c.name)

        output.having = convert_list(self._convert_having, query.having)

        return output