def format_cube(decoders, aggs, start, query, select): # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER new_edges = count_dim(aggs, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] for row, coord, agg in aggs_iterator(aggs, decoders): for s, m in matricies: try: v = s.pull(agg) m[coord] = v except Exception as e: # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS if agg.get('doc_count') != 0: Log.error("Programmer error", cause=e) cube = Cube( query.select, sort_using_key( new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY {s.name: m for s, m in matricies}) cube.frum = query return cube
def format_list_from_groupby(aggs, es_query, query, decoders, all_selects): new_edges = wrap(count_dim(aggs, es_query, decoders)) def data(): groupby = query.groupby dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims) for row, coord, agg, _selects in aggs_iterator( aggs, es_query, decoders, give_me_zeros=(query.sort and not query.groupby)): output = is_sent[coord] if output == None: output = is_sent[coord] = Data() for g, d, c in zip(groupby, decoders, coord): output[g.put.name] = d.get_value(c) for s in all_selects: output[s.name] = None yield output # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED for s in _selects: union(output, s.name, s.pull(agg), s.aggregate) for g in query.groupby: g.put.name = coalesce(g.put.name, g.name) output = Data(meta={"format": "list"}, data=list(data())) return output
def format_cube(decoders, aggs, start, query, select): # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER new_edges = count_dim(aggs, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] for row, coord, agg in aggs_iterator(aggs, decoders): for s, m in matricies: try: v = s.pull(agg) m[coord] = v except Exception as e: # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS if agg.get('doc_count') != 0: Log.error("Programmer error", cause=e) cube = Cube( query.select, sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY {s.name: m for s, m in matricies} ) cube.frum = query return cube
def format_table(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) header = new_edges.name + select.name def data(): dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: all_coord = is_sent._all_combos( ) # TRACK THE EXPECTED COMBINATIONS for row, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: record = [ d.get_value(missing_coord[i]) for i, d in enumerate(decoders) ] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record missing_coord = all_coord.next() output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: record = [ d.get_value(c[i]) for i, d in enumerate(decoders) ] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record return Data(meta={"format": "table"}, header=header, data=list(data()))
def format_cube(aggs, es_query, query, decoders, all_selects): new_edges = count_dim(aggs, es_query, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects): # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION is_default = Matrix(dims=dims, zeros=True) matricies = {s.name: Matrix(dims=dims) for s in all_selects} for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) if v == None: continue is_default[coord] = False union(m, coord, v, select.aggregate) # FILL THE DEFAULT VALUES for c, v in is_default: if v: for s in all_selects: matricies[s.name][c] = s.default else: matricies = { s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects } for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) union(m, coord, v, select.aggregate) cube = Cube( query.select, sort_using_key( new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY matricies) cube.frum = query return cube
def format_table(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) header = new_edges.name + select.name def data(): dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS for row, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record missing_coord = all_coord.next() output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: record = [d.get_value(c[i]) for i, d in enumerate(decoders)] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record return Data( meta={"format": "table"}, header=header, data=list(data()) )
def format_list_from_groupby(aggs, es_query, query, decoders, all_selects): new_edges = wrap(count_dim(aggs, es_query, decoders)) def data(): groupby = query.groupby dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims) give_me_zeros = query.sort and not query.groupby finishes = [] # IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY for s in all_selects: if s.default != canonical_aggregates[s.aggregate].default: s.finish = s.default s.default = None finishes.append(s) for row, coord, agg, _selects in aggs_iterator( aggs, es_query, decoders, give_me_zeros=give_me_zeros): output = is_sent[coord] if output == None: output = is_sent[coord] = Data() for g, d, c in zip(groupby, decoders, coord): output[g.put.name] = d.get_value(c) for s in all_selects: output[s.name] = s.default yield output # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED for s in _selects: union(output, s.name, s.pull(agg), s.aggregate) if finishes: # SET ANY DEFAULTS for c, o in is_sent: for s in finishes: if o[s.name] == None: o[s.name] = s.finish for g in query.groupby: g.put.name = coalesce(g.put.name, g.name) output = Data(meta={"format": "list"}, data=list(data())) return output
def format_cube(aggs, es_query, query, decoders, all_selects): new_edges = count_dim(aggs, es_query, decoders) dims = [] for e in new_edges: if is_op(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects): # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION is_default = Matrix(dims=dims, zeros=True) matricies = {s.name: Matrix(dims=dims) for s in all_selects} for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) if v == None: continue is_default[coord] = False union(m, coord, v, select.aggregate) # FILL THE DEFAULT VALUES for c, v in is_default: if v: for s in all_selects: matricies[s.name][c] = s.default else: matricies = {s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects} for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) union(m, coord, v, select.aggregate) cube = Cube( query.select, sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY matricies ) cube.frum = query return cube
def format_list_from_groupby(aggs, es_query, query, decoders, all_selects): new_edges = wrap(count_dim(aggs, es_query, decoders)) def data(): groupby = query.groupby dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims) give_me_zeros = query.sort and not query.groupby finishes = [] # IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY for s in all_selects: if s.default != canonical_aggregates[s.aggregate].default: s.finish = s.default s.default = None finishes.append(s) for row, coord, agg, _selects in aggs_iterator(aggs, es_query, decoders, give_me_zeros=give_me_zeros): output = is_sent[coord] if output == None: output = is_sent[coord] = Data() for g, d, c in zip(groupby, decoders, coord): output[g.put.name] = d.get_value(c) for s in all_selects: output[s.name] = s.default yield output # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED for s in _selects: union(output, s.name, s.pull(agg), s.aggregate) if finishes: # SET ANY DEFAULTS for c, o in is_sent: for s in finishes: if o[s.name] == None: o[s.name] = s.finish for g in query.groupby: g.put.name = coalesce(g.put.name, g.name) output = Data( meta={"format": "list"}, data=list(data()) ) return output
def format_table(aggs, es_query, query, decoders, all_selects): new_edges = wrap(count_dim(aggs, es_query, decoders)) dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) rank = len(dims) header = tuple(new_edges.name + all_selects.name) name2index = {s.name: i + rank for i, s in enumerate(all_selects)} def data(): is_sent = Matrix(dims=dims) give_me_zeros = query.sort and not query.groupby if give_me_zeros: # WE REQUIRE THE ZEROS FOR SORTING all_coord = is_sent._all_combos( ) # TRACK THE EXPECTED COMBINATIONS ordered_coord = all_coord.next()[::-1] output = None for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): if coord != ordered_coord: # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES if output is not None: for s in all_selects: i = name2index[s.name] if output[i] is None: output[i] = s.default # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT ordered_coord = all_coord.next()[::-1] while coord != ordered_coord: # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord record = [ d.get_value(ordered_coord[i]) for i, d in enumerate(decoders) ] + [s.default for s in all_selects] yield record ordered_coord = all_coord.next()[::-1] # coord == missing_coord output = [d.get_value(c) for c, d in zip(coord, decoders) ] + [None for s in all_selects] for select in ss: v = select.pull(agg) if v != None: union(output, name2index[select.name], v, select.aggregate) yield output else: last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS output = None for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): if coord != last_coord: if output: # SET DEFAULTS for i, s in enumerate(all_selects): v = output[rank + i] if v == None: output[rank + i] = s.default yield output output = is_sent[coord] if output == None: output = is_sent[coord] = [ d.get_value(c) for c, d in zip(coord, decoders) ] + [None for _ in all_selects] last_coord = coord # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED for select in ss: v = select.pull(agg) if v != None: union(output, name2index[select.name], v, select.aggregate) if output: # SET DEFAULTS ON LAST ROW for i, s in enumerate(all_selects): v = output[rank + i] if v == None: output[rank + i] = s.default yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for coord, output in is_sent: if output == None: record = [ d.get_value(c) for c, d in zip(coord, decoders) ] + [s.default for s in all_selects] yield record return Data(meta={"format": "table"}, header=header, data=list(data()))
def format_list(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) def data(): dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE all_coord = is_sent._all_combos( ) # TRACK THE EXPECTED COMBINATIONS for _, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: # INSERT THE MISSING COORDINATE INTO THE GENERATION output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value( missing_coord[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output missing_coord = all_coord.next() output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value(c[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output output = Data(meta={"format": "list"}, data=list(data())) return output
def format_list(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) def data(): dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS for _, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: # INSERT THE MISSING COORDINATE INTO THE GENERATION output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value(missing_coord[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output missing_coord = all_coord.next() output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value(c[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output output = Data( meta={"format": "list"}, data=list(data()) ) return output
def format_table(aggs, es_query, query, decoders, all_selects): new_edges = wrap(count_dim(aggs, es_query, decoders)) dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) rank = len(dims) header = tuple(new_edges.name + all_selects.name) name2index = {s.name: i + rank for i, s in enumerate(all_selects)} def data(): is_sent = Matrix(dims=dims) give_me_zeros = query.sort and not query.groupby if give_me_zeros: # WE REQUIRE THE ZEROS FOR SORTING all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS ordered_coord = all_coord.next()[::-1] output = None for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): if coord != ordered_coord: # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES if output is not None: for s in all_selects: i = name2index[s.name] if output[i] is None: output[i] = s.default # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT ordered_coord = all_coord.next()[::-1] while coord != ordered_coord: # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects] yield record ordered_coord = all_coord.next()[::-1] # coord == missing_coord output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects] for select in ss: v = select.pull(agg) if v != None: union(output, name2index[select.name], v, select.aggregate) yield output else: last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS output = None for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): if coord != last_coord: if output: # SET DEFAULTS for i, s in enumerate(all_selects): v = output[rank+i] if v == None: output[rank+i] = s.default yield output output = is_sent[coord] if output == None: output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects] last_coord = coord # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED for select in ss: v = select.pull(agg) if v != None: union(output, name2index[select.name], v, select.aggregate) if output: # SET DEFAULTS ON LAST ROW for i, s in enumerate(all_selects): v = output[rank+i] if v == None: output[rank+i] = s.default yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for coord, output in is_sent: if output == None: record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects] yield record return Data( meta={"format": "table"}, header=header, data=list(data()) )