def search(self, query): query = to_data(query) f = jx.get(query.query.filtered.filter) filtered = list_to_data([{ "_id": i, "_source": d } for i, d in self.data.items() if f(d)]) if query.fields: return dict_to_data({ "hits": { "total": len(filtered), "hits": [{ "_id": d._id, "fields": unwrap( jx.select([unwrap(d._source)], query.fields)[0]) } for d in filtered] } }) else: return dict_to_data( {"hits": { "total": len(filtered), "hits": filtered }})
def _convert_group(self, column): if is_text(column): return dict_to_data({ "name": column, "value": column, "domain": { "type": "default" } }) else: column = to_data(column) if (column.domain and column.domain.type != "default" ) or column.allowNulls != None: Log.error("groupby does not accept complicated domains") if not column.name and not is_text(column.value): Log.error("You must name compound edges: {{edge}}", edge=column) return dict_to_data({ "name": coalesce(column.name, column.value), "value": column.value, "domain": { "type": "default" } })
def __data__(self): if first(self.schema.columns).name=='.': return dict_to_data({ "meta": {"format": "list"}, "data": self.data }) else: return dict_to_data({ "meta": {"format": "list"}, "data": [{k: unwraplist(v) for k, v in row.items()} for row in self.data] })
def _select_a_field(field): if is_text(field): return dict_to_data({"name": field, "value": split_field(field)}) elif is_text(to_data(field).value): field = to_data(field) return dict_to_data({ "name": field.name, "value": split_field(field.value) }) else: return dict_to_data({"name": field.name, "value": field.value})
def to_es(self, schema): if self.select is not NULL: # and bool(self.select): return dict_to_data({"nested": { "path": self.path.var, "query": ES52[self.where].to_es(schema), "inner_hits": (ES52[self.select].to_es() | {"size": 100000}), }}) else: return dict_to_data({"nested": { "path": self.path.var, "query": ES52[self.where].to_es(schema), }})
def get(url): """ USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON """ url = text(url) if url.find("://") == -1: Log.error("{{url}} must have a prototcol (eg http://) declared", url=url) base = URL("") if url.startswith("file://") and url[7] != "/": if os.sep == "\\": base = URL("file:///" + os.getcwd().replace(os.sep, "/").rstrip("/") + "/.") else: base = URL("file://" + os.getcwd().rstrip("/") + "/.") elif url[url.find("://") + 3] != "/": Log.error("{{url}} must be absolute", url=url) phase1 = _replace_ref(dict_to_data({"$ref": url}), base) # BLANK URL ONLY WORKS IF url IS ABSOLUTE try: phase2 = _replace_locals(phase1, [phase1]) return to_data(phase2) except Exception as e: Log.error("problem replacing locals in\n{{phase1}}", phase1=phase1, cause=e)
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return dict_to_data({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return list_to_data([self.convert(value) for value in expr]) else: return expr
def list2cube(rows, column_names=None): if column_names: keys = column_names else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) data = {k: [] for k in keys} output = dict_to_data({ "meta": { "format": "cube" }, "edges": [{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": len(rows), "interval": 1 } }], "data": data }) for r in rows: for k in keys: data[k].append(unwraplist(r[k])) return output
def __getitem__(self, item): # TODO: SOLVE FUNDAMENTAL QUESTION OF IF SELECTING A PART OF AN # EDGE REMOVES THAT EDGE FROM THIS RESULT, OR ADDS THE PART # AS A select {"name":edge.name, "value":edge.domain.partitions[coord]} # PROBABLY NOT, THE value IS IDENTICAL OVER THE REMAINING if is_data(item): coordinates = [None] * len(self.edges) # MAP DICT TO NUMERIC INDICES for name, v in item.items(): ei, parts = first((i, e.domain.partitions) for i, e in enumerate(self.edges) if e.name == name) if not parts: Log.error( "Can not find {{name}}=={{value|quote}} in list of edges, maybe this feature is not implemented yet", name=name, value=v) part = first(p for p in parts if p.value == v) if not part: return Null else: coordinates[ei] = part.dataIndex edges = [e for e, v in zip(self.edges, coordinates) if v is None] if not edges: # ZERO DIMENSIONAL VALUE return dict_to_data({ k: v.__getitem__(coordinates) for k, v in self.data.items() }) else: output = Cube(select=self.select, edges=list_to_data([ e for e, v in zip(self.edges, coordinates) if v is None ]), data={ k: Matrix(values=c.__getitem__(coordinates)) for k, c in self.data.items() }) return output elif is_text(item): # RETURN A VALUE CUBE if self.is_value: if item != self.select.name: Log.error("{{name}} not found in cube", name=item) return self if item not in self.select.name: Log.error("{{name}} not found in cube", name=item) output = Cube(select=first(s for s in self.select if s.name == item), edges=self.edges, data={item: self.data[item]}) return output else: Log.error("not implemented yet")
def get_instance_metadata(timeout=None): if not isinstance(timeout, (int, float)): timeout = Duration(timeout).seconds output = dict_to_data({ k.replace("-", "_"): v for k, v in boto_utils.get_instance_metadata( timeout=coalesce(timeout, 5), num_retries=2).items() }) return output
def __data__(self): return dict_to_data({ "name": self.name, "type": self.type, "value": self.value, "key": self.key, "isFacet": self.isFacet, "where": self.where, "dimension": self.dimension })
def es_query_template(path): """ RETURN TEMPLATE AND PATH-TO-FILTER AS A 2-TUPLE :param path: THE NESTED PATH (NOT INCLUDING TABLE NAME) :return: (es_query, es_filters) TUPLE """ if not is_text(path): Log.error("expecting path to be a string") if path != ".": f0 = {} f1 = {} output = dict_to_data({ "query": es_and([ f0, { "nested": { "path": path, "query": f1, "inner_hits": { "size": 100000 } } } ]), "from": 0, "size": 0, "sort": [] }) return output, list_to_data([f0, f1]) else: f0 = {} output = dict_to_data({ "query": es_and([f0]), "from": 0, "size": 0, "sort": [] }) return output, list_to_data([f0])
def jx_query(self, jx_query): docs = self.sql_query( sql_query( dict_to_data({"from": text(self.full_name)}) | jx_query, self.schema)) data = [] for d in docs: u = untyped(from_data(leaves_to_data(d))) data.append(u) return Data(data=data, format="list")
def query(self, q): frum = self if is_aggs(q): return cube_aggs(frum, q) columns = dot.dict_to_data({s.name: s for s in self.select + self.edges}) # DEFER TO ListContainer from jx_python.containers.list import ListContainer frum = ListContainer(name="", data=frum.values(), schema=columns) return frum.query(q)
def sign(message, private_key): data = value2json(message).encode("utf8") # SIGN DATA/STRING signature = private_key.sign(data=data, padding=PSS, algorithm=SHA256) return dict_to_data({ "data": bytes2base64(data), "signature": bytes2base64(signature), "padding": "PSS", "algorithm=": "SHA256" })
def test_cardinality(self): pre_test = dict_to_data({ "data": [{ "a": "b" }, { "a": "c" }], "query": { "from": TEST_TABLE }, # DUMMY QUERY "expecting_list": { "meta": { "format": "list" }, "data": [{ "a": "b" }, { "a": "c" }] } }) settings = self.utils.fill_container(pre_test) self.utils.send_queries(pre_test) test = { "query": { "from": META_COLUMNS_NAME, "select": "cardinality", "where": { "and": [{ "eq": { "table": settings.alias } }, { "eq": { "name": "a" } }] } }, "expecting_list": { "meta": { "format": "list" }, "data": [2] } } Log.note("table = {{table}}", table=pre_test.query['from']) subtest = to_data(test) self.utils.send_queries(subtest)
def test_delete(self): settings = self.utils.fill_container(dict_to_data({ "data": [{ "a": 1, "b": 5 }, { "a": 3, "b": 4 }, { "a": 4, "b": 3 }, { "a": 6, "b": 2 }, { "a": 2 }] }), typed=True) import jx_elasticsearch container = jx_elasticsearch.new_instance( read_only=False, kwargs=self.utils._es_test_settings) container.update({ "update": settings.alias, "clear": ".", "where": { "lt": { "a": 4 } } }) self.utils.send_queries({ "query": { "from": settings.alias, "sort": "a" }, "expecting_list": { "data": [{ "a": 4, "b": 3 }, { "a": 6, "b": 2 }] } })
def list2table(rows, column_names=None): if column_names: keys = list(set(column_names)) else: columns = set() for r in rows: columns |= set(r.keys()) keys = list(columns) output = [[unwraplist(r.get(k)) for k in keys] for r in rows] return dict_to_data({ "meta": { "format": "table" }, "header": keys, "data": output })
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) if is_op(edge.value, LeavesOp): prefix = edge.value.term.var flatter = lambda k: literal_field(relative_field(k, prefix)) else: prefix = edge.value.var flatter = lambda k: relative_field(k, prefix) self.put, self.fields = transpose(*[ (flatter(untype_path(c.name)), c.es_column) for c in query.frum.schema.leaves(prefix) ]) self.domain = self.edge.domain = dict_to_data({"dimension": {"fields": self.fields}}) self.domain.limit = temper_limit(self.domain.limit, query) self.parts = list() self.key2index = {} self.computed_domain = False
def test_new_field(self): settings = self.utils.fill_container(dict_to_data({ "data": [{ "a": 1, "b": 5 }, { "a": 3, "b": 4 }, { "a": 4, "b": 3 }, { "a": 6, "b": 2 }, { "a": 2 }] }), typed=False) import jx_elasticsearch container = jx_elasticsearch.new_instance( read_only=False, kwargs=self.utils._es_test_settings) container.update({ "update": settings.alias, "set": { "c": { "add": ["a", "b"] } } }) self.utils.send_queries({ "query": { "from": settings.alias, "select": ["c", "a"] }, "expecting_table": { "header": ["a", "c"], "data": [[1, 6], [3, 7], [4, 7], [6, 8], [2, NULL]] } })
def es_script(term): return dict_to_data({"script": {"lang": "painless", "source": term}})
Log.warning("Problem with {{name}}! Shutting down.", name=self.app_name, cause=exc_val) Log.stop() def _same_frame(frameA, frameB): return (frameA.line, frameA.file) == (frameB.line, frameB.file) # GET THE MACHINE METADATA machine_metadata = dict_to_data({ "pid": os.getpid(), "python": text(platform.python_implementation()), "os": text(platform.system() + platform.release()).strip(), "name": text(platform.node()) }) def raise_from_none(e): raise e if PY3: exec("def raise_from_none(e):\n raise e from None\n", globals(), locals()) export("mo_logs.startup", Log)
def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: container = to_data(query)["from"] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container=container, namespace=container.namespace) if container == None: from jx_python.containers.list import DUAL return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) elif is_many(container): container = ListContainer(name=None, data=list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) elif is_op(container, QueryOp): container = run(container) elif is_data(container): query = container container = query["from"] container = run(QueryOp.wrap(query, container, container.namespace), container) else: Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__) if is_aggs(query_op): container = list_aggs(container, query_op) else: # SETOP if query_op.where is not TRUE: container = filter(container, query_op.where) if query_op.sort: container = sort(container, query_op.sort, already_normalized=True) if query_op.select: container = select(container, query_op.select) if query_op.window: if isinstance(container, Cube): container = list(container.values()) for param in query_op.window: window(container, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": container = list2cube(container) elif query_op.format == "table": container = list2table(container) container.meta.format = "table" else: container = dict_to_data({ "meta": { "format": "list" }, "data": container }) return container
# EXPECTING CONCAT OF <sign><integer><type> plist = value.split("+") for p, pplist in enumerate(plist): mlist = pplist.split("-") output = output + _string2Duration(mlist[0]) for m in mlist[1::]: output = output - _string2Duration(m) return output MILLI_VALUES = dict_to_data({ "year": float(52 * 7 * 24 * 60 * 60 * 1000), # 52weeks "quarter": float(13 * 7 * 24 * 60 * 60 * 1000), # 13weeks "month": float(28 * 24 * 60 * 60 * 1000), # 4weeks "week": float(7 * 24 * 60 * 60 * 1000), "day": float(24 * 60 * 60 * 1000), "hour": float(60 * 60 * 1000), "minute": float(60 * 1000), "second": float(1000), "milli": float(1), "zero": float(0) }) MONTH_VALUES = dict_to_data({ "year": 12, "quarter": 3, "month": 1, "week": 0, "day": 0, "hour": 0, "minute": 0, "second": 0,
def __data__(self): output = dict_to_data({s: getattr(self, s) for s in QueryOp.__slots__}) return output
pass canonical_aggregates = dict_to_data({ "cardinality": { "name": "cardinality", "default": 0 }, "count": { "name": "count", "default": 0 }, "min": { "name": "minimum" }, "max": { "name": "maximum" }, "add": { "name": "sum" }, "avg": { "name": "average" }, "mean": { "name": "average" }, }) def _normalize_selects(
def es_and(terms): return dict_to_data({"bool": {"filter": terms}})
def test_meta(self): test = dict_to_data({ "query": { "from": TEST_TABLE }, "data": [{ "a": "b" }] }) settings = self.utils.fill_container(test, typed=False) table_name = settings.alias # WE REQUIRE A QUERY TO FORCE LOADING OF METADATA pre_test = { "query": { "from": table_name }, "expecting_list": { "meta": { "format": "list" }, "data": [{ "a": "b" }] } } self.utils.send_queries(pre_test) test = { "query": { "select": ["name", "table", "type", "nested_path"], "from": META_COLUMNS_NAME, "where": { "eq": { "table": table_name } } }, "expecting_list": { "meta": { "format": "list" }, "data": [{ "table": table_name, "name": "_id", "type": "string", "nested_path": "." }, { "table": table_name, "name": "a", "type": "string", "nested_path": "." }] }, "expecting_table": { "meta": { "format": "table" }, "header": ["table", "name", "type", "nested_path"], "data": [[table_name, "_id", "string", "."], [table_name, "a", "string", "."]] }, "expecting_cube": { "meta": { "format": "cube" }, "edges": [{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": 2, "interval": 1 } }], "data": { "table": [table_name, table_name], "name": ["_id", "a"], "type": ["string", "string"], "nested_path": [".", "."] } } } self.utils.send_queries(test)
def es_not(term): return dict_to_data({"bool": {"must_not": term}})
def es_or(terms): return dict_to_data({"bool": {"should": terms}})