def _es_terms2(es, mvel, query): """ WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value """ # REQUEST VALUES IN FIRST DIMENSION q1 = query.copy() q1.edges = query.edges[0:1:] values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value select = listwrap(query.select) FromES = build_es_query(query) for s in select: for i, v in enumerate(values1): FromES.facets[s.name + "," + str(i)] = { "terms": { "field": query.edges[1].value, "size": coalesce(query.limit, 200000) }, "facet_filter": simplify_esfilter({"and": [ query.where, {"term": {query.edges[0].value: v}} ]}) } data = es_post(es, FromES, query.limit) # UNION ALL TERMS FROM SECOND DIMENSION values2 = set() for k, f in data.facets.items(): values2.update(f.terms.term) values2 = jx.sort(values2) term2index = {v: i for i, v in enumerate(values2)} query.edges[1].domain.partitions = FlatList([{"name": v, "value": v} for v in values2]) # MAKE CUBE output = {} dims = [len(values1), len(values2)] for s in select: output[s.name] = Matrix(*dims) # FILL CUBE # EXPECTING ONLY SELECT CLAUSE FACETS for facetName, facet in data.facets.items(): coord = facetName.split(",") s = [s for s in select if s.name == coord[0]][0] i1 = int(coord[1]) for term in facet.terms: i2 = term2index[term.term] output[s.name][(i1, i2)] = term[aggregates[s.aggregate]] cube = Cube(query.select, query.edges, output) cube.query = query return cube
def es_countop(es, mvel, query): """ RETURN SINGLE COUNT """ select = listwrap(query.select) FromES = build_es_query(query) for s in select: if is_variable_name(s.value): FromES.facets[s.name] = { "terms": { "field": s.value, "size": query.limit, }, "facet_filter": { "exists": { "field": s.value } } } else: # COMPLICATED value IS PROBABLY A SCRIPT, USE IT FromES.facets[s.name] = { "terms": { "script_field": es09.expressions.compile_expression(s.value, query), "size": 200000 } } data = es_post(es, FromES, query.limit) matricies = {} for s in select: matricies[s.name] = Matrix(value=data.hits.facets[s.name].total) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def es_aggop(es, mvel, query): select = listwrap(query.select) FromES = build_es_query(query) isSimple = AND(aggregates[s.aggregate] == "count" for s in select) if isSimple: return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD value2facet = dict() # ONLY ONE FACET NEEDED PER name2facet = dict() # MAP name TO FACET WITH STATS for s in select: if s.value not in value2facet: if isinstance(s.value, Variable): unwrap(FromES.facets)[s.name] = { "statistical": { "field": s.value.var }, "facet_filter": query.where.to_esfilter() } else: unwrap(FromES.facets)[s.name] = { "statistical": { "script": jx_expression_to_function(s.value) }, "facet_filter": query.where.to_es_filter() } value2facet[s.value] = s.name name2facet[s.name] = value2facet[s.value] data = es_post(es, FromES, query.limit) matricies = { s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[ aggregates[s.aggregate]]) for s in select } cube = Cube(query.select, [], matricies) cube.frum = query return cube
def es_terms_stats(esq, mvel, query): select = listwrap(query.select) facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART termsEdges = FlatList() specialEdge = None special_index = -1 # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME # FIND THE specialEdge, IF ONE for f, tedge in enumerate(query.edges): if tedge.domain.type in domains.KNOWN: for p, part in enumerate(tedge.domain.partitions): part.dataIndex = p # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) # OR IF WE ARE NOT SIMPLY COUNTING # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) # OR IF WE JUST WANT TO FORCE IT :) # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM facetEdges.append(tedge) else: if specialEdge: Log.error( "There is more than one open-ended edge: self can not be handled" ) specialEdge = tedge special_index = f termsEdges.append(tedge) if not specialEdge: # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? num_parts = 0 special_index = -1 for i, e in enumerate(facetEdges): l = len(e.domain.partitions) if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: num_parts = l specialEdge = e special_index = i facetEdges.pop(special_index) termsEdges.append(specialEdge) total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges) * len(select) if total_facets > 100: # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: counts = esq.query({ "from": query.frum, "select": { "aggregate": "count" }, "edges": facetEdges, "where": query.where, "limit": query.limit }) esFacets = [] def add_facet(value, parts, cube): if value: esFacets.append(parts) counts["count"].forall(add_facet) Log.note( "{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count=len(esFacets), theory_count=total_facets) if not esFacets: # MAKE EMPTY CUBE matricies = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: matricies[s.name] = Matrix(*dims) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube else: # GENERATE ALL COMBOS esFacets = getAllEdges(facetEdges) calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) term2parts = calcTerm.term2parts if len(esFacets) * len(select) > 1000: Log.error( "not implemented yet" ) # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES pass FromES = build_es_query(query) for s in select: for parts in esFacets: condition = FlatList() constants = FlatList() name = [literal_field(s.name)] for f, fedge in enumerate(facetEdges): name.append(str(parts[f].dataIndex)) condition.append(buildCondition(mvel, fedge, parts[f])) constants.append({ "name": fedge.domain.name, "value": parts[f] }) condition.append(query.where) name = ",".join(name) FromES.facets[name] = { "terms_stats": { "key_field": calcTerm.field, "value_field": s.value if is_variable_name(s.value) else None, "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, "size": coalesce(query.limit, 200000) } } if condition: FromES.facets[name].facet_filter = simplify_esfilter( {"and": condition}) data = es_post(esq.es, FromES, query.limit) if specialEdge.domain.type not in domains.KNOWN: # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED partitions = FlatList() map = {} for facetName, parts in data.facets.items(): for stats in parts.terms: if not map[stats]: part = {"value": stats, "name": stats} partitions.append(part) map[stats] = part partitions.sort(specialEdge.domain.compare) for p, part in enumerate(partitions): part.dataIndex = p specialEdge.domain.map = map specialEdge.domain.partitions = partitions # MAKE CUBE matricies = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: matricies[s.name] = Matrix(*dims) name2agg = {s.name: aggregates[s.aggregate] for s in select} # FILL CUBE for edgeName, parts in data.facets.items(): temp = edgeName.split(",") pre_coord = tuple(int(c) for c in temp[1:]) sname = temp[0] for stats in parts.terms: if specialEdge: special = term2parts(stats.term)[0] coord = pre_coord[:special_index] + ( special.dataIndex, ) + pre_coord[special_index:] else: coord = pre_coord matricies[sname][coord] = stats[name2agg[sname]] cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def es_terms(es, mvel, query): """ RETURN LIST OF ALL EDGE QUERIES EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION """ if len(query.edges) == 2: return _es_terms2(es, mvel, query) select = listwrap(query.select) FromES = build_es_query(query) packed_term = compileEdges2Term(mvel, query.edges, wrap([])) for s in select: FromES.facets[s.name] = { "terms": { "field": packed_term.field, "script_field": packed_term.expression, "size": coalesce(query.limit, 200000) }, "facet_filter": simplify_esfilter(query.where) } term2Parts = packed_term.term2parts data = es_post(es, FromES, query.limit) # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN for k, f in data.facets.items(): for t in f.terms: term2Parts(t.term) # NUMBER ALL EDGES FOR jx INDEXING for f, e in enumerate(query.edges): e.index = f if e.domain.type in ["uid", "default"]: # e.domain.partitions = jx.sort(e.domain.partitions, "value") for p, part in enumerate(e.domain.partitions): part.dataIndex = p e.domain.NULL.dataIndex = len(e.domain.partitions) # MAKE CUBE output = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: output[s.name] = Matrix(*dims) # FILL CUBE # EXPECTING ONLY SELECT CLAUSE FACETS for facetName, facet in data.facets.items(): for term in facet.terms: term_coord = term2Parts(term.term).dataIndex for s in select: try: output[s.name][term_coord] = term[aggregates[s.aggregate]] except Exception as e: # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS pass cube = Cube(query.select, query.edges, output) cube.query = query return cube
def es_terms(es, mvel, query): """ RETURN LIST OF ALL EDGE QUERIES EVERY FACET IS NAMED <select.name>, <c1>, ... <cN> WHERE <ci> ARE THE ELEMENT COORDINATES WE TRY TO PACK DIMENSIONS INTO THE TERMS TO MINIMIZE THE CROSS-PRODUCT EXPLOSION """ if len(query.edges) == 2: return _es_terms2(es, mvel, query) select = listwrap(query.select) FromES = build_es_query(query) packed_term = compileEdges2Term(mvel, query.edges, wrap([])) for s in select: FromES.facets[s.name] = { "terms": { "field": packed_term.field, "script_field": packed_term.expression, "size": coalesce(query.limit, 200000) }, "facet_filter": simplify_esfilter(query.where) } term2Parts = packed_term.term2parts data = es_post(es, FromES, query.limit) # GETTING ALL PARTS WILL EXPAND THE EDGES' DOMAINS # BUT HOW TO UNPACK IT FROM THE term FASTER IS UNKNOWN for k, f in data.facets.items(): for t in f.terms: term2Parts(t.term) # NUMBER ALL EDGES FOR jx INDEXING for f, e in enumerate(query.edges): e.index = f if e.domain.type in ["uid", "default"]: # e.domain.partitions = jx.sort(e.domain.partitions, "value") for p, part in enumerate(e.domain.partitions): part.dataIndex = p e.domain.NULL.dataIndex = len(e.domain.partitions) # MAKE CUBE output = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: output[s.name] = Matrix(*dims) # FILL CUBE # EXPECTING ONLY SELECT CLAUSE FACETS for facetName, facet in data.facets.items(): for term in facet.terms: term_coord = term2Parts(term.term).dataIndex for s in select: try: output[s.name][term_coord] = term[aggregates[s.aggregate]] except Exception as e: # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS pass cube = Cube(query.select, query.edges, output) cube.query = query return cube
def es_terms_stats(esq, mvel, query): select = listwrap(query.select) facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART termsEdges = FlatList() specialEdge = None special_index = -1 # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME # FIND THE specialEdge, IF ONE for f, tedge in enumerate(query.edges): if tedge.domain.type in domains.KNOWN: for p, part in enumerate(tedge.domain.partitions): part.dataIndex = p # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) # OR IF WE ARE NOT SIMPLY COUNTING # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) # OR IF WE JUST WANT TO FORCE IT :) # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM facetEdges.append(tedge) else: if specialEdge: Log.error("There is more than one open-ended edge: self can not be handled") specialEdge = tedge special_index = f termsEdges.append(tedge) if not specialEdge: # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? num_parts = 0 special_index = -1 for i, e in enumerate(facetEdges): l = len(e.domain.partitions) if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: num_parts = l specialEdge = e special_index = i facetEdges.pop(special_index) termsEdges.append(specialEdge) total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select) if total_facets > 100: # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: counts = esq.query({ "from": query.frum, "select": {"aggregate": "count"}, "edges": facetEdges, "where": query.where, "limit": query.limit }) esFacets = [] def add_facet(value, parts, cube): if value: esFacets.append(parts) counts["count"].forall(add_facet) Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count= len(esFacets), theory_count=total_facets) if not esFacets: # MAKE EMPTY CUBE matricies = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: matricies[s.name] = Matrix(*dims) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube else: # GENERATE ALL COMBOS esFacets = getAllEdges(facetEdges) calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) term2parts = calcTerm.term2parts if len(esFacets) * len(select) > 1000: Log.error("not implemented yet") # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES pass FromES = build_es_query(query) for s in select: for parts in esFacets: condition = FlatList() constants = FlatList() name = [literal_field(s.name)] for f, fedge in enumerate(facetEdges): name.append(str(parts[f].dataIndex)) condition.append(buildCondition(mvel, fedge, parts[f])) constants.append({"name": fedge.domain.name, "value": parts[f]}) condition.append(query.where) name = ",".join(name) FromES.facets[name] = { "terms_stats": { "key_field": calcTerm.field, "value_field": s.value if is_variable_name(s.value) else None, "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, "size": coalesce(query.limit, 200000) } } if condition: FromES.facets[name].facet_filter = simplify_esfilter({"and": condition}) data = es_post(esq.es, FromES, query.limit) if specialEdge.domain.type not in domains.KNOWN: # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED partitions = FlatList() map = {} for facetName, parts in data.facets.items(): for stats in parts.terms: if not map[stats]: part = {"value": stats, "name": stats} partitions.append(part) map[stats] = part partitions.sort(specialEdge.domain.compare) for p, part in enumerate(partitions): part.dataIndex = p specialEdge.domain.map = map specialEdge.domain.partitions = partitions # MAKE CUBE matricies = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: matricies[s.name] = Matrix(*dims) name2agg = {s.name: aggregates[s.aggregate] for s in select} # FILL CUBE for edgeName, parts in data.facets.items(): temp = edgeName.split(",") pre_coord = tuple(int(c) for c in temp[1:]) sname = temp[0] for stats in parts.terms: if specialEdge: special = term2parts(stats.term)[0] coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:] else: coord = pre_coord matricies[sname][coord] = stats[name2agg[sname]] cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube