Exemple #1
0
def run(query, container=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if container == None:
        container = wrap(query)['from']
        query_op = QueryOp.wrap(query,
                                container=container,
                                namespace=container.schema)
    else:
        query_op = QueryOp.wrap(query, container, container.namespace)

    if container == None:
        from jx_python.containers.list_usingPythonList import DUAL
        return DUAL.query(query_op)
    elif isinstance(container, Container):
        return container.query(query_op)
    elif isinstance(container, (list, set) + generator_types):
        container = wrap(list(container))
    elif isinstance(container, Cube):
        if is_aggs(query_op):
            return cube_aggs(container, query_op)
    elif isinstance(container, QueryOp):
        container = run(container)
    else:
        Log.error("Do not know how to handle {{type}}",
                  type=container.__class__.__name__)

    if is_aggs(query_op):
        container = list_aggs(container, query_op)
    else:  # SETOP
        if query_op.where is not TRUE:
            container = filter(container, query_op.where)

        if query_op.sort:
            container = sort(container, query_op.sort, already_normalized=True)

        if query_op.select:
            container = select(container, query_op.select)

    if query_op.window:
        if isinstance(container, Cube):
            container = list(container.values())

        for param in query_op.window:
            window(container, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        container = convert.list2cube(container)
    elif query_op.format == "table":
        container = convert.list2table(container)
        container.meta.format = "table"
    else:
        container = wrap({"meta": {"format": "list"}, "data": container})

    return container
Exemple #2
0
def run(query, container=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if container == None:
        container = wrap(query)['from']
        query_op = QueryOp.wrap(query, container=container, namespace=container.schema)
    else:
        query_op = QueryOp.wrap(query, container, container.namespace)

    if container == None:
        from jx_python.containers.list_usingPythonList import DUAL
        return DUAL.query(query_op)
    elif isinstance(container, Container):
        return container.query(query_op)
    elif isinstance(container, (list, set) + generator_types):
        container = wrap(list(container))
    elif isinstance(container, Cube):
        if is_aggs(query_op):
            return cube_aggs(container, query_op)
    elif isinstance(container, QueryOp):
        container = run(container)
    else:
        Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__)

    if is_aggs(query_op):
        container = list_aggs(container, query_op)
    else:  # SETOP
        if query_op.where is not TRUE:
            container = filter(container, query_op.where)

        if query_op.sort:
            container = sort(container, query_op.sort, already_normalized=True)

        if query_op.select:
            container = select(container, query_op.select)

    if query_op.window:
        if isinstance(container, Cube):
            container = list(container.values())

        for param in query_op.window:
            window(container, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        container = convert.list2cube(container)
    elif query_op.format == "table":
        container = convert.list2table(container)
        container.meta.format = "table"
    else:
        container = wrap({
            "meta": {"format": "list"},
            "data": container
        })

    return container
Exemple #3
0
    def _convert_query(self, query):
        output = QueryOp(None)
        output.select = self._convert_clause(query.select)
        output.where = self.convert(query.where)
        output.frum = self._convert_from(query.frum)
        output.edges = convert_list(self._convert_edge, query.edges)
        output.having = convert_list(self._convert_having, query.having)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_clause(query.sort)
        output.format = query.format

        return output
Exemple #4
0
    def _convert_query(self, query):
        output = QueryOp(None)
        output.select = self._convert_clause(query.select)
        output.where = self.convert(query.where)
        output.frum = self._convert_from(query.frum)
        output.edges = convert_list(self._convert_edge, query.edges)
        output.having = convert_list(self._convert_having, query.having)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_clause(query.sort)
        output.format = query.format

        return output
Exemple #5
0
def find_container(frum, after):
    """
    :param frum:
    :return:
    """
    global namespace
    if not namespace:
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )
        namespace = ElasticsearchMetadata(container.config.default.settings)
    if not frum:
        Log.error("expecting json query expression with from clause")

    # FORCE A RELOAD
    namespace.get_columns(frum, after=after)

    if is_text(frum):
        if frum in container_cache:
            return container_cache[frum]

        path = split_field(frum)
        if path[0] == "meta":
            if path[1] == "columns":
                return namespace.meta.columns.denormalized()
            elif path[1] == "tables":
                return namespace.meta.tables
            else:
                fact_table_name = join_field(path[:2])
        else:
            fact_table_name = path[0]

        type_ = container.config.default.type

        settings = set_default(
            {
                "alias": fact_table_name,
                "name": frum,
                "exists": True
            },
            container.config.default.settings,
        )
        settings.type = None
        output = container.type2container[type_](settings)
        container_cache[frum] = output
        return output
    elif is_data(frum) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif is_data(frum) and (frum["from"] or is_container(frum["from"])):
        from jx_base.query import QueryOp

        return QueryOp.wrap(frum)
    elif is_container(frum):
        return ListContainer("test_list", frum)
    else:
        return frum
Exemple #6
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, container=self, namespace=self.namespace)

            for s in listwrap(query.select):
                if s.aggregate != None and not aggregates.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self.es, query):
                return es_deepop(self.es, query)
            if is_aggsop(self.es, query):
                return es_aggsop(self.es, frum, query)
            if is_setop(self.es, query):
                return es_setop(self.es, query)
            Log.error("Can not handle")
        except Exception as e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self.es.cluster.url / "_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Exemple #7
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, table=self)

            for n in self.namespaces:
                query = n.convert(query)

            for s in listwrap(query.select):
                if not aggregates.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            Log.error("Can not handle")
        except Exception as e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path+"/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Exemple #8
0
def compare_to_expected(query, result, expect):
    query = wrap(query)
    expect = wrap(expect)

    if result.meta.format == "table":
        assertAlmostEqual(set(result.header), set(expect.header))

        # MAP FROM expected COLUMN TO result COLUMN
        mapping = list(zip(*list(zip(*filter(
            lambda v: v[0][1] == v[1][1],
            itertools.product(enumerate(expect.header), enumerate(result.header))
        )))[1]))[0]
        result.header = [result.header[m] for m in mapping]

        if result.data:
            columns = list(zip(*unwrap(result.data)))
            result.data = zip(*[columns[m] for m in mapping])

        if not query.sort:
            sort_table(result)
            sort_table(expect)
    elif result.meta.format == "list":
        if query["from"].startswith("meta."):
            pass
        else:
            query = QueryOp.wrap(query, query.frum, query.schema)

        if not query.sort:
            try:
                #result.data MAY BE A LIST OF VALUES, NOT OBJECTS
                data_columns = jx.sort(set(jx.get_columns(result.data, leaves=True)) | set(jx.get_columns(expect.data, leaves=True)), "name")
            except Exception as _:
                data_columns = [{"name": "."}]

            sort_order = listwrap(coalesce(query.edges, query.groupby)) + data_columns

            if isinstance(expect.data, list):
                try:
                    expect.data = jx.sort(expect.data, sort_order.name)
                except Exception:
                    pass

            if isinstance(result.data, list):
                try:
                    result.data = jx.sort(result.data, sort_order.name)
                except Exception:
                    pass

    elif result.meta.format == "cube" and len(result.edges) == 1 and result.edges[0].name == "rownum" and not query.sort:
        result_data, result_header = cube2list(result.data)
        result_data = unwrap(jx.sort(result_data, result_header))
        result.data = list2cube(result_data, result_header)

        expect_data, expect_header = cube2list(expect.data)
        expect_data = jx.sort(expect_data, expect_header)
        expect.data = list2cube(expect_data, expect_header)

    # CONFIRM MATCH
    assertAlmostEqual(result, expect, places=6)
Exemple #9
0
 def query(self, _query):
     return self.meta.columns.query(
         QueryOp(
             set_default(
                 {
                     "from": self.meta.columns,
                     "sort": ["table", "name"]
                 }, _query.__data__())))
Exemple #10
0
    def _convert_query(self, query):
        output = QueryOp(None)
        output.select = self._convert_clause(query.select)
        output.where = self.convert(query.where)
        output["from"] = self._convert_from(query["from"])
        output.edges = self._convert_clause(query.edges)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_clause(query.sort)
        output.format = query.format

        return output
Exemple #11
0
    def send_queries(self, subtest, places=6):
        subtest = wrap(subtest)

        try:
            # EXECUTE QUERY
            num_expectations = 0
            for i, (k, v) in enumerate(subtest.items()):
                if k in ["expecting", "expecting_error"]:  # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS)
                    format = None
                elif k.startswith("expecting_"):  # WHAT FORMAT ARE WE REQUESTING
                    format = k[len("expecting_"):]
                else:
                    continue

                num_expectations += 1
                expected = v

                subtest.query.format = format
                subtest.query.meta.testing = (num_expectations == 1)  # MARK FIRST QUERY FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION
                query = value2json(subtest.query).encode('utf8')
                # EXECUTE QUERY
                response = self.try_till_response(self.testing.query, data=query)

                if k == "expecting_error":
                    if response.status_code != 200:
                        message = response.content.decode('utf8')
                        if v in message:
                            Log.note("PASS {{name|quote}} (expected error)", name=subtest.name)
                            continue
                        else:
                            Log.error("expecting {{expecting}} not {{error}}", expecting=v, error=message)
                    else:
                        Log.error("expecting a failure")
                else:
                    if response.status_code != 200:
                        error(response)
                    result = json2value(response.all_content.decode('utf8'))

                container = jx_elasticsearch.new_instance(self._es_test_settings)
                query = QueryOp.wrap(subtest.query, container, container.namespace)
                if is_many(expected.data) and len(result.data) != len(expected.data):
                    Log.error(
                        "expecting data (len={{rlen}}) to have length of {{elen}}",
                        rlen=len(result.data),
                        elen=len(expected.data)
                    )

                compare_to_expected(query, result, expected, places)
                Log.note("PASS {{name|quote}} (format={{format}})", name=subtest.name, format=format)
            if num_expectations == 0:
                Log.error(
                    "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses",
                    name=subtest.name
                )
        except Exception as e:
            Log.error("Failed test {{name|quote}}", name=subtest.name, cause=e)
Exemple #12
0
    def query(self, query, stacked=False):
        """
        TRANSLATE JSON QUERY EXPRESSION ON SINGLE TABLE TO SQL QUERY
        """
        from jx_base.query import QueryOp

        query = QueryOp.wrap(query)

        sql, post = self._subquery(query, isolate=False, stacked=stacked)
        query.data = post(sql)
        return query.data
Exemple #13
0
    def query(self, query, stacked=False):
        """
        TRANSLATE JSON QUERY EXPRESSION ON SINGLE TABLE TO SQL QUERY
        """
        from jx_base.query import QueryOp

        query = QueryOp.wrap(query)

        sql, post = self._subquery(query, isolate=False, stacked=stacked)
        query.data = post(sql)
        return query.data
Exemple #14
0
    def query(self, query):
        query = QueryOp.wrap(query)
        short_list = self._filter(query.where)
        if query.sort:
            short_list = self._sort(query.sort)

        if isinstance(query.select, list):
            accessors = map(jx.get, query.select.value)

        if query.window:
            for w in query.window:
                window_list = self._filter(w.where)
def find_container(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _meta:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, text_type):
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)

        type_ = container.config.default.type
        fact_table_name = split_field(frum)[0]

        settings = set_default(
            {
                "index": fact_table_name,
                "name": frum,
                "exists": True,
            }, container.config.default.settings)
        settings.type = None
        return container.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from jx_base.query import QueryOp
        return QueryOp.wrap(frum, namespace=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Exemple #16
0
    def query(self, query):
        query = QueryOp.wrap(query)
        short_list = self._filter(query.where)
        if query.sort:
            short_list = self._sort(query.sort)

        if isinstance(query.select, list):
            accessors = map(jx.get, query.select.value)

        if query.window:
            for w in query.window:
                window_list = self._filter(w.where)
Exemple #17
0
    def send_queries(self, subtest, places=6):
        subtest = wrap(subtest)

        try:
            # EXECUTE QUERY
            num_expectations = 0
            for i, (k, v) in enumerate(subtest.items()):
                if k.startswith("expecting_"):  # WHAT FORMAT ARE WE REQUESTING
                    format = k[len("expecting_"):]
                elif k == "expecting":  # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS)
                    format = None
                else:
                    continue

                num_expectations += 1
                expected = v

                subtest.query.format = format
                subtest.query.meta.testing = (
                    num_expectations == 1
                )  # MARK FIRST QUERY FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION
                query = unicode2utf8(value2json(subtest.query))
                # EXECUTE QUERY
                response = self.try_till_response(self.testing.query,
                                                  data=query)

                if response.status_code != 200:
                    error(response)
                result = json2value(utf82unicode(response.all_content))

                container = jx_elasticsearch.new_instance(
                    self._es_test_settings)
                query = QueryOp.wrap(subtest.query, container,
                                     container.namespace)
                compare_to_expected(query, result, expected, places)
                Log.note("PASS {{name|quote}} (format={{format}})",
                         name=subtest.name,
                         format=format)
            if num_expectations == 0:
                Log.error(
                    "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses",
                    name=subtest.name)
        except Exception as e:
            Log.error("Failed test {{name|quote}}", {"name": subtest.name}, e)
Exemple #18
0
    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif isinstance(frum, (list, set) + generator_types):
            return _ListContainer(frum)
        elif isinstance(frum, text_type):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error(
                    "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
                )

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                }, config.default.settings)
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif isinstance(frum, Mapping):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}",
                      type=frum.__class__.__name__)
Exemple #19
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query,
                                 container=self,
                                 namespace=self.namespace)

            self.stats.record(query)

            for s in listwrap(query.select):
                if s.aggregate != None and not aggregates.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate)

            frum = query["from"]
            if is_op(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_bulk_agg(self.es, query):
                return es_bulkaggsop(self, frum, query)
            if is_bulk_set(self.es, query):
                return es_bulksetop(self, frum, query)

            query.limit = temper_limit(query.limit, query)

            if is_deepop(self.es, query):
                return es_deepop(self.es, query)
            if is_aggsop(self.es, query):
                return es_aggsop(self.es, frum, query)
            if is_setop(self.es, query):
                return es_setop(self.es, query)
            Log.error("Can not handle")
        except Exception as e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self.es.cluster.url / "_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Exemple #20
0
    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif is_many(frum):
            return _ListContainer(frum)
        elif is_text(frum):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                },
                config.default.settings
            )
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif is_data(frum):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
Exemple #21
0
    def query(self, query):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        if not startswith_field(query['from'], self.sf.fact):
            Log.error("Expecting table, or some nested table")
        frum, query['from'] = query['from'], self
        table = self.sf.tables[relative_field(frum, self.sf.fact)]
        schema = table.schema
        query = QueryOp.wrap(query, table=table, schema=schema)
        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = "CREATE TABLE " + quote_column(new_table) + " AS "
        else:
            create_table = ""

        if query.groupby and query.format != "cube":
            op, index_to_columns = self._groupby_op(query, frum)
            command = create_table + op
        elif query.groupby:
            query.edges, query.groupby = query.groupby, query.edges
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
            query.edges, query.groupby = query.groupby, query.edges
        elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate):
            op, index_to_columns = self._edges_op(query, frum)
            command = create_table + op
        else:
            op = self._set_op(query, frum)
            return op

        result = self.db.query(command)

        if query.format == "container":
            output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True)
        elif query.format == "cube" or (not query.format and query.edges):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name

            if len(query.edges) == 0 and len(query.groupby) == 0:
                data = {n: Data() for n in column_names}
                for s in index_to_columns.values():
                    data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0]))
                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    data=unwrap(data),
                    select=select,
                    meta={"format": "cube"}
                )

            if not result.data:
                edges = []
                dims = []
                for i, e in enumerate(query.edges + query.groupby):
                    allowNulls = coalesce(e.allowNulls, True)

                    if e.domain.type == "set" and e.domain.partitions:
                        domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                    elif e.domain.type == "range":
                        domain = e.domain
                    elif isinstance(e.value, TupleOp):
                        pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name],
                                        "push_child").pull
                        parts = [tuple(p(d) for p in pulls) for d in result.data]
                        domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                    else:
                        domain = SimpleSetDomain(partitions=[])

                    dims.append(1 if allowNulls else 0)
                    edges.append(Data(
                        name=e.name,
                        allowNulls=allowNulls,
                        domain=domain
                    ))

                data = {}
                for si, s in enumerate(listwrap(query.select)):
                    if s.aggregate == "count":
                        data[s.name] = Matrix(dims=dims, zeros=0)
                    else:
                        data[s.name] = Matrix(dims=dims)

                if isinstance(query.select, list):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    meta={"format": "cube"},
                    edges=edges,
                    select=select,
                    data={k: v.cube for k, v in data.items()}
                )

            columns = None

            edges = []
            dims = []
            for g in query.groupby:
                g.is_groupby = True

            for i, e in enumerate(query.edges + query.groupby):
                allowNulls = coalesce(e.allowNulls, True)

                if e.domain.type == "set" and e.domain.partitions:
                    domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                elif e.domain.type == "range":
                    domain = e.domain
                elif e.domain.type == "time":
                    domain = wrap(mo_json.scrub(e.domain))
                elif e.domain.type == "duration":
                    domain = wrap(mo_json.scrub(e.domain))
                elif isinstance(e.value, TupleOp):
                    pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull
                    parts = [tuple(p(d) for p in pulls) for d in result.data]
                    domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                else:
                    if not columns:
                        columns = zip(*result.data)
                    parts = set(columns[i])
                    if e.is_groupby and None in parts:
                        allowNulls = True
                    parts -= {None}

                    if query.sort[i].sort == -1:
                        domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True)))
                    else:
                        domain = SimpleSetDomain(partitions=jx.sort(parts))

                dims.append(len(domain.partitions) + (1 if allowNulls else 0))
                edges.append(Data(
                    name=e.name,
                    allowNulls=allowNulls,
                    domain=domain
                ))

            data_cubes = {}
            for si, s in enumerate(listwrap(query.select)):
                if s.aggregate == "count":
                    data_cubes[s.name] = Matrix(dims=dims, zeros=0)
                else:
                    data_cubes[s.name] = Matrix(dims=dims)

            r2c = index_to_coordinate(dims)  # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM
            for rownum, row in enumerate(result.data):
                coord = r2c(rownum)

                for i, s in enumerate(index_to_columns.values()):
                    if s.is_edge:
                        continue
                    if s.push_child == ".":
                        data_cubes[s.push_name][coord] = s.pull(row)
                    else:
                        data_cubes[s.push_name][coord][s.push_child] = s.pull(row)

            if query.select == None:
                select = Null
            elif isinstance(query.select, list):
                select = [{"name": s.name} for s in query.select]
            else:
                select = {"name": query.select.name}

            return Data(
                meta={"format": "cube"},
                edges=edges,
                select=select,
                data={k: v.cube for k, v in data_cubes.items()}
            )
        elif query.format == "table" or (not query.format and query.groupby):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name
            data = []
            for d in result.data:
                row = [None for _ in column_names]
                for s in index_to_columns.values():
                    if s.push_child == ".":
                        row[s.push_column] = s.pull(d)
                    elif s.num_push_columns:
                        tuple_value = row[s.push_column]
                        if tuple_value == None:
                            tuple_value = row[s.push_column] = [None] * s.num_push_columns
                        tuple_value[s.push_child] = s.pull(d)
                    elif row[s.push_column] == None:
                        row[s.push_column] = Data()
                        row[s.push_column][s.push_child] = s.pull(d)
                    else:
                        row[s.push_column][s.push_child] = s.pull(d)
                data.append(tuple(unwrap(r) for r in row))

            output = Data(
                meta={"format": "table"},
                header=column_names,
                data=data
            )
        elif query.format == "list" or (not query.edges and not query.groupby):
            if not query.edges and not query.groupby and any(listwrap(query.select).aggregate):
                if isinstance(query.select, list):
                    data = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            if data[c.push_name] == None:
                                data[c.push_name] = c.pull(result.data[0])
                            elif isinstance(data[c.push_name], list):
                                data[c.push_name].append(c.pull(result.data[0]))
                            else:
                                data[c.push_name] = [data[c.push_name], c.pull(result.data[0])]
                        else:
                            data[c.push_name][c.push_child] = c.pull(result.data[0])

                    output = Data(
                        meta={"format": "value"},
                        data=data
                    )
                else:
                    data = Data()
                    for s in index_to_columns.values():
                        if not data[s.push_child]:
                            data[s.push_child] = s.pull(result.data[0])
                        else:
                            data[s.push_child] += [s.pull(result.data[0])]
                    output = Data(
                        meta={"format": "value"},
                        data=unwrap(data)
                    )
            else:
                data = []
                for rownum in result.data:
                    row = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            row[c.push_name] = c.pull(rownum)
                        elif c.num_push_columns:
                            tuple_value = row[c.push_name]
                            if not tuple_value:
                                tuple_value = row[c.push_name] = [None] * c.num_push_columns
                            tuple_value[c.push_child] = c.pull(rownum)
                        else:
                            row[c.push_name][c.push_child] = c.pull(rownum)

                    data.append(row)

                output = Data(
                    meta={"format": "list"},
                    data=data
                )
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
Exemple #22
0
    def query_metadata(self, query):
        frum, query['from'] = query['from'], self
        schema = self.sf.tables["."].schema
        query = QueryOp.wrap(query, schema)
        columns = self.sf.columns
        where = query.where
        table_name = None
        column_name = None

        if query.edges or query.groupby:
            Log.error("Aggregates(groupby or edge) are not supported")

        if where.op == "eq" and where.lhs.var == "table":
            table_name = mo_json.json2value(where.rhs.json)
        elif where.op == "eq" and where.lhs.var == "name":
            column_name = mo_json.json2value(where.rhs.json)
        else:
            Log.error("Only simple filters are expected like: \"eq\" on table and column name")

        t = [i for i in columns[0].names.keys()]
        tables = [concat_field(self.sf.fact, i) for i in t]

        metadata = []
        if columns[-1].es_column != GUID:
            columns.append(Column(
                names={i: relative_field(GUID, i) for i in t},
                type="string",
                es_column=GUID,
                es_index=self.sf.fact,
                nested_path=["."]
            ))

        for tname, table in zip(t, tables):
            if table_name != None and table_name != table:
                continue

            for col in columns:
                cname, ctype = untyped_column(col.es_column)
                if column_name != None and column_name != cname:
                    continue

                metadata.append((table, col.names[tname], col.type, unwraplist(col.nested_path)))

        if query.format == "cube":
            num_rows = len(metadata)
            header = ["table", "name", "type", "nested_path"]
            temp_data = dict(zip(header, zip(*metadata)))
            return Data(
                meta={"format": "cube"},
                data=temp_data,
                edges=[{
                    "name": "rownum",
                    "domain": {
                        "type": "rownum",
                        "min": 0,
                        "max": num_rows,
                        "interval": 1
                    }
                }]
            )
        elif query.format == "table":
            header = ["table", "name", "type", "nested_path"]
            return Data(
                meta={"format": "table"},
                header=header,
                data=metadata
            )
        else:
            header = ["table", "name", "type", "nested_path"]
            return Data(
                meta={"format": "list"},
                data=[dict(zip(header, r)) for r in metadata]
            )
Exemple #23
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = wrap(query)

        output = QueryOp(None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {"name": "count", "value": ".", "aggregate": "count", "default": 0}
            else:
                output.select = {"name": "__all__", "value": "*", "aggregate": "none"}

        if query.groupby and query.edges:
            Log.error("You can not use both the `groupby` and `edges` clauses in the same query!")
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not mo_math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and len(c.nested_path) != 1:
                Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name)

        output.having = convert_list(self._convert_having, query.having)

        return output
Exemple #24
0
    def _convert_query(self, query):
        # if not isinstance(query["from"], Container):
        #     Log.error('Expecting from clause to be a Container')
        query = wrap(query)

        output = QueryOp("from", None)
        output["from"] = self._convert_from(query["from"])

        output.format = query.format

        if query.select:
            output.select = convert_list(self._convert_select, query.select)
        else:
            if query.edges or query.groupby:
                output.select = {
                    "name": "count",
                    "value": ".",
                    "aggregate": "count",
                    "default": 0
                }
            else:
                output.select = {
                    "name": "__all__",
                    "value": "*",
                    "aggregate": "none"
                }

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = convert_list(self._convert_edge, query.edges)
            output.groupby = None
        elif query.groupby:
            output.edges = None
            output.groupby = convert_list(self._convert_group, query.groupby)
        else:
            output.edges = []
            output.groupby = None

        output.where = self.convert(query.where)
        output.window = convert_list(self._convert_window, query.window)
        output.sort = self._convert_sort(query.sort)

        output.limit = coalesce(query.limit, DEFAULT_LIMIT)
        if not Math.is_integer(output.limit) or output.limit < 0:
            Log.error("Expecting limit >= 0")

        output.isLean = query.isLean

        # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN
        # THE from SOURCE IS.
        vars = get_all_vars(
            output, exclude_where=True)  # WE WILL EXCLUDE where VARIABLES
        for c in query.columns:
            if c.name in vars and len(c.nested_path) != 1:
                Log.error("This query, with variable {{var_name}} is too deep",
                          var_name=c.name)

        output.having = convert_list(self._convert_having, query.having)

        return output
Exemple #25
0
def run(query, frum=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if frum == None:
        query_op = QueryOp.wrap(query)
        frum = query_op.frum
    else:
        query_op = QueryOp.wrap(query, frum.schema)

    if frum == None:
        from jx_python.containers.list_usingPythonList import DUAL
        return DUAL.query(query_op)
    elif isinstance(frum, Container):
        return frum.query(query_op)
    elif isinstance(frum, (list, set) + generator_types):
        frum = wrap(list(frum))
    elif isinstance(frum, Cube):
        if is_aggs(query_op):
            return cube_aggs(frum, query_op)
    elif isinstance(frum, QueryOp):
        frum = run(frum)
    else:
        Log.error("Do not know how to handle {{type}}",
                  type=frum.__class__.__name__)

    if is_aggs(query_op):
        frum = list_aggs(frum, query_op)
    else:  # SETOP
        # try:
        #     if query.filter != None or query.esfilter != None:
        #         Log.error("use 'where' clause")
        # except AttributeError:
        #     pass

        if query_op.where is not TRUE:
            frum = filter(frum, query_op.where)

        if query_op.sort:
            frum = sort(frum, query_op.sort, already_normalized=True)

        if query_op.select:
            frum = select(frum, query_op.select)

    if query_op.window:
        if isinstance(frum, Cube):
            frum = list(frum.values())

        for param in query_op.window:
            window(frum, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        frum = convert.list2cube(frum)
    elif query_op.format == "table":
        frum = convert.list2table(frum)
        frum.meta.format = "table"
    else:
        frum = wrap({"meta": {"format": "list"}, "data": frum})

    return frum