def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: container = wrap(query)['from'] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container, container.namespace) if container == None: from jx_python.containers.list_usingPythonList import DUAL return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) elif isinstance(container, (list, set) + generator_types): container = wrap(list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) elif isinstance(container, QueryOp): container = run(container) else: Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__) if is_aggs(query_op): container = list_aggs(container, query_op) else: # SETOP if query_op.where is not TRUE: container = filter(container, query_op.where) if query_op.sort: container = sort(container, query_op.sort, already_normalized=True) if query_op.select: container = select(container, query_op.select) if query_op.window: if isinstance(container, Cube): container = list(container.values()) for param in query_op.window: window(container, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": container = convert.list2cube(container) elif query_op.format == "table": container = convert.list2table(container) container.meta.format = "table" else: container = wrap({ "meta": {"format": "list"}, "data": container }) return container
def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: container = wrap(query)['from'] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container, container.namespace) if container == None: from jx_python.containers.list_usingPythonList import DUAL return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) elif isinstance(container, (list, set) + generator_types): container = wrap(list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) elif isinstance(container, QueryOp): container = run(container) else: Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__) if is_aggs(query_op): container = list_aggs(container, query_op) else: # SETOP if query_op.where is not TRUE: container = filter(container, query_op.where) if query_op.sort: container = sort(container, query_op.sort, already_normalized=True) if query_op.select: container = select(container, query_op.select) if query_op.window: if isinstance(container, Cube): container = list(container.values()) for param in query_op.window: window(container, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": container = convert.list2cube(container) elif query_op.format == "table": container = convert.list2table(container) container.meta.format = "table" else: container = wrap({"meta": {"format": "list"}, "data": container}) return container
def find_container(frum, after): """ :param frum: :return: """ global namespace if not namespace: if not container.config.default.settings: Log.error( "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info" ) namespace = ElasticsearchMetadata(container.config.default.settings) if not frum: Log.error("expecting json query expression with from clause") # FORCE A RELOAD namespace.get_columns(frum, after=after) if is_text(frum): if frum in container_cache: return container_cache[frum] path = split_field(frum) if path[0] == "meta": if path[1] == "columns": return namespace.meta.columns.denormalized() elif path[1] == "tables": return namespace.meta.tables else: fact_table_name = join_field(path[:2]) else: fact_table_name = path[0] type_ = container.config.default.type settings = set_default( { "alias": fact_table_name, "name": frum, "exists": True }, container.config.default.settings, ) settings.type = None output = container.type2container[type_](settings) container_cache[frum] = output return output elif is_data(frum) and frum.type and container.type2container[frum.type]: # TODO: Ensure the frum.name is set, so we capture the deep queries if not frum.type: Log.error("Expecting from clause to have a 'type' property") return container.type2container[frum.type](frum.settings) elif is_data(frum) and (frum["from"] or is_container(frum["from"])): from jx_base.query import QueryOp return QueryOp.wrap(frum) elif is_container(frum): return ListContainer("test_list", frum) else: return frum
def query(self, _query): try: query = QueryOp.wrap(_query, container=self, namespace=self.namespace) for s in listwrap(query.select): if s.aggregate != None and not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate ) frum = query["from"] if isinstance(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_deepop(self.es, query): return es_deepop(self.es, query) if is_aggsop(self.es, query): return es_aggsop(self.es, frum, query) if is_setop(self.es, query): return es_setop(self.es, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self.es.cluster.url / "_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def query(self, _query): try: query = QueryOp.wrap(_query, table=self) for n in self.namespaces: query = n.convert(query) for s in listwrap(query.select): if not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate ) frum = query["from"] if isinstance(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_deepop(self._es, query): return es_deepop(self._es, query) if is_aggsop(self._es, query): return es_aggsop(self._es, frum, query) if is_setop(self._es, query): return es_setop(self._es, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self._es.cluster.path+"/_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def compare_to_expected(query, result, expect): query = wrap(query) expect = wrap(expect) if result.meta.format == "table": assertAlmostEqual(set(result.header), set(expect.header)) # MAP FROM expected COLUMN TO result COLUMN mapping = list(zip(*list(zip(*filter( lambda v: v[0][1] == v[1][1], itertools.product(enumerate(expect.header), enumerate(result.header)) )))[1]))[0] result.header = [result.header[m] for m in mapping] if result.data: columns = list(zip(*unwrap(result.data))) result.data = zip(*[columns[m] for m in mapping]) if not query.sort: sort_table(result) sort_table(expect) elif result.meta.format == "list": if query["from"].startswith("meta."): pass else: query = QueryOp.wrap(query, query.frum, query.schema) if not query.sort: try: #result.data MAY BE A LIST OF VALUES, NOT OBJECTS data_columns = jx.sort(set(jx.get_columns(result.data, leaves=True)) | set(jx.get_columns(expect.data, leaves=True)), "name") except Exception as _: data_columns = [{"name": "."}] sort_order = listwrap(coalesce(query.edges, query.groupby)) + data_columns if isinstance(expect.data, list): try: expect.data = jx.sort(expect.data, sort_order.name) except Exception: pass if isinstance(result.data, list): try: result.data = jx.sort(result.data, sort_order.name) except Exception: pass elif result.meta.format == "cube" and len(result.edges) == 1 and result.edges[0].name == "rownum" and not query.sort: result_data, result_header = cube2list(result.data) result_data = unwrap(jx.sort(result_data, result_header)) result.data = list2cube(result_data, result_header) expect_data, expect_header = cube2list(expect.data) expect_data = jx.sort(expect_data, expect_header) expect.data = list2cube(expect_data, expect_header) # CONFIRM MATCH assertAlmostEqual(result, expect, places=6)
def send_queries(self, subtest, places=6): subtest = wrap(subtest) try: # EXECUTE QUERY num_expectations = 0 for i, (k, v) in enumerate(subtest.items()): if k in ["expecting", "expecting_error"]: # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS) format = None elif k.startswith("expecting_"): # WHAT FORMAT ARE WE REQUESTING format = k[len("expecting_"):] else: continue num_expectations += 1 expected = v subtest.query.format = format subtest.query.meta.testing = (num_expectations == 1) # MARK FIRST QUERY FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION query = value2json(subtest.query).encode('utf8') # EXECUTE QUERY response = self.try_till_response(self.testing.query, data=query) if k == "expecting_error": if response.status_code != 200: message = response.content.decode('utf8') if v in message: Log.note("PASS {{name|quote}} (expected error)", name=subtest.name) continue else: Log.error("expecting {{expecting}} not {{error}}", expecting=v, error=message) else: Log.error("expecting a failure") else: if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) container = jx_elasticsearch.new_instance(self._es_test_settings) query = QueryOp.wrap(subtest.query, container, container.namespace) if is_many(expected.data) and len(result.data) != len(expected.data): Log.error( "expecting data (len={{rlen}}) to have length of {{elen}}", rlen=len(result.data), elen=len(expected.data) ) compare_to_expected(query, result, expected, places) Log.note("PASS {{name|quote}} (format={{format}})", name=subtest.name, format=format) if num_expectations == 0: Log.error( "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses", name=subtest.name ) except Exception as e: Log.error("Failed test {{name|quote}}", name=subtest.name, cause=e)
def query(self, query, stacked=False): """ TRANSLATE JSON QUERY EXPRESSION ON SINGLE TABLE TO SQL QUERY """ from jx_base.query import QueryOp query = QueryOp.wrap(query) sql, post = self._subquery(query, isolate=False, stacked=stacked) query.data = post(sql) return query.data
def query(self, query): query = QueryOp.wrap(query) short_list = self._filter(query.where) if query.sort: short_list = self._sort(query.sort) if isinstance(query.select, list): accessors = map(jx.get, query.select.value) if query.window: for w in query.window: window_list = self._filter(w.where)
def find_container(frum, schema=None): """ :param frum: :param schema: :return: """ if not _meta: _delayed_imports() frum = wrap(frum) if isinstance(frum, text_type): if not container.config.default.settings: Log.error( "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info" ) type_ = None if frum.startswith("meta."): if frum == "meta.columns": return _meta.singlton.meta.columns.denormalized() elif frum == "meta.tables": return _meta.singlton.meta.tables else: Log.error("{{name}} not a recognized table", name=frum) type_ = container.config.default.type fact_table_name = split_field(frum)[0] settings = set_default( { "index": fact_table_name, "name": frum, "exists": True, }, container.config.default.settings) settings.type = None return container.type2container[type_](settings) elif isinstance( frum, Mapping) and frum.type and container.type2container[frum.type]: # TODO: Ensure the frum.name is set, so we capture the deep queries if not frum.type: Log.error("Expecting from clause to have a 'type' property") return container.type2container[frum.type](frum.settings) elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))): from jx_base.query import QueryOp return QueryOp.wrap(frum, namespace=schema) elif isinstance(frum, (list, set)): return _ListContainer("test_list", frum) else: return frum
def send_queries(self, subtest, places=6): subtest = wrap(subtest) try: # EXECUTE QUERY num_expectations = 0 for i, (k, v) in enumerate(subtest.items()): if k.startswith("expecting_"): # WHAT FORMAT ARE WE REQUESTING format = k[len("expecting_"):] elif k == "expecting": # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS) format = None else: continue num_expectations += 1 expected = v subtest.query.format = format subtest.query.meta.testing = ( num_expectations == 1 ) # MARK FIRST QUERY FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION query = unicode2utf8(value2json(subtest.query)) # EXECUTE QUERY response = self.try_till_response(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) container = jx_elasticsearch.new_instance( self._es_test_settings) query = QueryOp.wrap(subtest.query, container, container.namespace) compare_to_expected(query, result, expected, places) Log.note("PASS {{name|quote}} (format={{format}})", name=subtest.name, format=format) if num_expectations == 0: Log.error( "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses", name=subtest.name) except Exception as e: Log.error("Failed test {{name|quote}}", {"name": subtest.name}, e)
def new_instance(type, frum, schema=None): """ Factory! """ if not type2container: _delayed_imports() if isinstance(frum, Container): return frum elif isinstance(frum, _Cube): return frum elif isinstance(frum, _Query): return _run(frum) elif isinstance(frum, (list, set) + generator_types): return _ListContainer(frum) elif isinstance(frum, text_type): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error( "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info" ) settings = set_default( { "index": join_field(split_field(frum)[:1:]), "name": frum, }, config.default.settings) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) elif isinstance(frum, Mapping): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) elif frum["from"]: frum = copy(frum) frum["from"] = Container(frum["from"]) return _Query.wrap(frum) else: Log.error("Do not know how to handle {{frum|json}}", frum=frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
def query(self, _query): try: query = QueryOp.wrap(_query, container=self, namespace=self.namespace) self.stats.record(query) for s in listwrap(query.select): if s.aggregate != None and not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate) frum = query["from"] if is_op(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_bulk_agg(self.es, query): return es_bulkaggsop(self, frum, query) if is_bulk_set(self.es, query): return es_bulksetop(self, frum, query) query.limit = temper_limit(query.limit, query) if is_deepop(self.es, query): return es_deepop(self.es, query) if is_aggsop(self.es, query): return es_aggsop(self.es, frum, query) if is_setop(self.es, query): return es_setop(self.es, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self.es.cluster.url / "_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def new_instance(type, frum, schema=None): """ Factory! """ if not type2container: _delayed_imports() if isinstance(frum, Container): return frum elif isinstance(frum, _Cube): return frum elif isinstance(frum, _Query): return _run(frum) elif is_many(frum): return _ListContainer(frum) elif is_text(frum): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info") settings = set_default( { "index": join_field(split_field(frum)[:1:]), "name": frum, }, config.default.settings ) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) elif is_data(frum): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) elif frum["from"]: frum = copy(frum) frum["from"] = Container(frum["from"]) return _Query.wrap(frum) else: Log.error("Do not know how to handle {{frum|json}}", frum=frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
def run(query, frum=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if frum == None: query_op = QueryOp.wrap(query) frum = query_op.frum else: query_op = QueryOp.wrap(query, frum.schema) if frum == None: from jx_python.containers.list_usingPythonList import DUAL return DUAL.query(query_op) elif isinstance(frum, Container): return frum.query(query_op) elif isinstance(frum, (list, set) + generator_types): frum = wrap(list(frum)) elif isinstance(frum, Cube): if is_aggs(query_op): return cube_aggs(frum, query_op) elif isinstance(frum, QueryOp): frum = run(frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__) if is_aggs(query_op): frum = list_aggs(frum, query_op) else: # SETOP # try: # if query.filter != None or query.esfilter != None: # Log.error("use 'where' clause") # except AttributeError: # pass if query_op.where is not TRUE: frum = filter(frum, query_op.where) if query_op.sort: frum = sort(frum, query_op.sort, already_normalized=True) if query_op.select: frum = select(frum, query_op.select) if query_op.window: if isinstance(frum, Cube): frum = list(frum.values()) for param in query_op.window: window(frum, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": frum = convert.list2cube(frum) elif query_op.format == "table": frum = convert.list2table(frum) frum.meta.format = "table" else: frum = wrap({"meta": {"format": "list"}, "data": frum}) return frum
def query(self, query): """ :param query: JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT :return: """ if not startswith_field(query['from'], self.sf.fact): Log.error("Expecting table, or some nested table") frum, query['from'] = query['from'], self table = self.sf.tables[relative_field(frum, self.sf.fact)] schema = table.schema query = QueryOp.wrap(query, table=table, schema=schema) new_table = "temp_" + unique_name() if query.format == "container": create_table = "CREATE TABLE " + quote_column(new_table) + " AS " else: create_table = "" if query.groupby and query.format != "cube": op, index_to_columns = self._groupby_op(query, frum) command = create_table + op elif query.groupby: query.edges, query.groupby = query.groupby, query.edges op, index_to_columns = self._edges_op(query, frum) command = create_table + op query.edges, query.groupby = query.groupby, query.edges elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate): op, index_to_columns = self._edges_op(query, frum) command = create_table + op else: op = self._set_op(query, frum) return op result = self.db.query(command) if query.format == "container": output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True) elif query.format == "cube" or (not query.format and query.edges): column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1) for c in index_to_columns.values(): column_names[c.push_column] = c.push_column_name if len(query.edges) == 0 and len(query.groupby) == 0: data = {n: Data() for n in column_names} for s in index_to_columns.values(): data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0])) if isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( data=unwrap(data), select=select, meta={"format": "cube"} ) if not result.data: edges = [] dims = [] for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain(partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif isinstance(e.value, TupleOp): pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: domain = SimpleSetDomain(partitions=[]) dims.append(1 if allowNulls else 0) edges.append(Data( name=e.name, allowNulls=allowNulls, domain=domain )) data = {} for si, s in enumerate(listwrap(query.select)): if s.aggregate == "count": data[s.name] = Matrix(dims=dims, zeros=0) else: data[s.name] = Matrix(dims=dims) if isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data.items()} ) columns = None edges = [] dims = [] for g in query.groupby: g.is_groupby = True for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain(partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif e.domain.type == "time": domain = wrap(mo_json.scrub(e.domain)) elif e.domain.type == "duration": domain = wrap(mo_json.scrub(e.domain)) elif isinstance(e.value, TupleOp): pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: if not columns: columns = zip(*result.data) parts = set(columns[i]) if e.is_groupby and None in parts: allowNulls = True parts -= {None} if query.sort[i].sort == -1: domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True))) else: domain = SimpleSetDomain(partitions=jx.sort(parts)) dims.append(len(domain.partitions) + (1 if allowNulls else 0)) edges.append(Data( name=e.name, allowNulls=allowNulls, domain=domain )) data_cubes = {} for si, s in enumerate(listwrap(query.select)): if s.aggregate == "count": data_cubes[s.name] = Matrix(dims=dims, zeros=0) else: data_cubes[s.name] = Matrix(dims=dims) r2c = index_to_coordinate(dims) # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM for rownum, row in enumerate(result.data): coord = r2c(rownum) for i, s in enumerate(index_to_columns.values()): if s.is_edge: continue if s.push_child == ".": data_cubes[s.push_name][coord] = s.pull(row) else: data_cubes[s.push_name][coord][s.push_child] = s.pull(row) if query.select == None: select = Null elif isinstance(query.select, list): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data_cubes.items()} ) elif query.format == "table" or (not query.format and query.groupby): column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1) for c in index_to_columns.values(): column_names[c.push_column] = c.push_column_name data = [] for d in result.data: row = [None for _ in column_names] for s in index_to_columns.values(): if s.push_child == ".": row[s.push_column] = s.pull(d) elif s.num_push_columns: tuple_value = row[s.push_column] if tuple_value == None: tuple_value = row[s.push_column] = [None] * s.num_push_columns tuple_value[s.push_child] = s.pull(d) elif row[s.push_column] == None: row[s.push_column] = Data() row[s.push_column][s.push_child] = s.pull(d) else: row[s.push_column][s.push_child] = s.pull(d) data.append(tuple(unwrap(r) for r in row)) output = Data( meta={"format": "table"}, header=column_names, data=data ) elif query.format == "list" or (not query.edges and not query.groupby): if not query.edges and not query.groupby and any(listwrap(query.select).aggregate): if isinstance(query.select, list): data = Data() for c in index_to_columns.values(): if c.push_child == ".": if data[c.push_name] == None: data[c.push_name] = c.pull(result.data[0]) elif isinstance(data[c.push_name], list): data[c.push_name].append(c.pull(result.data[0])) else: data[c.push_name] = [data[c.push_name], c.pull(result.data[0])] else: data[c.push_name][c.push_child] = c.pull(result.data[0]) output = Data( meta={"format": "value"}, data=data ) else: data = Data() for s in index_to_columns.values(): if not data[s.push_child]: data[s.push_child] = s.pull(result.data[0]) else: data[s.push_child] += [s.pull(result.data[0])] output = Data( meta={"format": "value"}, data=unwrap(data) ) else: data = [] for rownum in result.data: row = Data() for c in index_to_columns.values(): if c.push_child == ".": row[c.push_name] = c.pull(rownum) elif c.num_push_columns: tuple_value = row[c.push_name] if not tuple_value: tuple_value = row[c.push_name] = [None] * c.num_push_columns tuple_value[c.push_child] = c.pull(rownum) else: row[c.push_name][c.push_child] = c.pull(rownum) data.append(row) output = Data( meta={"format": "list"}, data=data ) else: Log.error("unknown format {{format}}", format=query.format) return output
def query_metadata(self, query): frum, query['from'] = query['from'], self schema = self.sf.tables["."].schema query = QueryOp.wrap(query, schema) columns = self.sf.columns where = query.where table_name = None column_name = None if query.edges or query.groupby: Log.error("Aggregates(groupby or edge) are not supported") if where.op == "eq" and where.lhs.var == "table": table_name = mo_json.json2value(where.rhs.json) elif where.op == "eq" and where.lhs.var == "name": column_name = mo_json.json2value(where.rhs.json) else: Log.error("Only simple filters are expected like: \"eq\" on table and column name") t = [i for i in columns[0].names.keys()] tables = [concat_field(self.sf.fact, i) for i in t] metadata = [] if columns[-1].es_column != GUID: columns.append(Column( names={i: relative_field(GUID, i) for i in t}, type="string", es_column=GUID, es_index=self.sf.fact, nested_path=["."] )) for tname, table in zip(t, tables): if table_name != None and table_name != table: continue for col in columns: cname, ctype = untyped_column(col.es_column) if column_name != None and column_name != cname: continue metadata.append((table, col.names[tname], col.type, unwraplist(col.nested_path))) if query.format == "cube": num_rows = len(metadata) header = ["table", "name", "type", "nested_path"] temp_data = dict(zip(header, zip(*metadata))) return Data( meta={"format": "cube"}, data=temp_data, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }] ) elif query.format == "table": header = ["table", "name", "type", "nested_path"] return Data( meta={"format": "table"}, header=header, data=metadata ) else: header = ["table", "name", "type", "nested_path"] return Data( meta={"format": "list"}, data=[dict(zip(header, r)) for r in metadata] )