def ibytes2ilines(generator, encoding="utf8", flexible=False, closer=None): """ CONVERT A GENERATOR OF (ARBITRARY-SIZED) byte BLOCKS TO A LINE (CR-DELIMITED) GENERATOR :param generator: :param encoding: None TO DO NO DECODING :param closer: OPTIONAL FUNCTION TO RUN WHEN DONE ITERATING :return: """ decode = get_decoder(encoding=encoding, flexible=flexible) _buffer = next(generator) s = 0 e = _buffer.find(b"\n") while True: while e == -1: try: next_block = next(generator) _buffer = _buffer[s:] + next_block s = 0 e = _buffer.find(b"\n") except StopIteration: _buffer = _buffer[s:] del generator if closer: closer() if _buffer: yield decode(_buffer) return yield decode(_buffer[s:e]) s = e + 1 e = _buffer.find(b"\n", s)
def __getitem__(self, item): try: if item == self._next: self._next += 1 return next(self._iter) elif item == self._next - 1: return self._last else: Log.error("can not index out-of-order too much") except Exception as e: Log.error("Problem indexing", e)
def __iter__(self): if not self.concat: return it = self.concat.__iter__() v = next(it) for vv in v: yield vv for v in it: for s in self.sep: yield s for vv in v: yield vv
def aggs_iterator(aggs, es_query, decoders, give_me_zeros=False): """ DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE: RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS :param aggs: ES AGGREGATE OBJECT :param es_query: THE ABSTRACT ES QUERY WE WILL TRACK ALONGSIDE aggs :param decoders: TO CONVERT PARTS INTO COORDINATES """ coord = [0] * len(decoders) parts = deque() stack = [] gen = _children(aggs, es_query.children) while True: try: index, c_agg, c_query, part = next(gen) except StopIteration: try: gen = stack.pop() except IndexError: return parts.popleft() continue if c_agg.get('doc_count') == 0 and not give_me_zeros: continue parts.appendleft(part) for d in c_query.decoders: coord[d.edge.dim] = d.get_index(tuple(p for p in parts if p is not None), c_query, index) children = c_query.children selects = c_query.selects if selects or not children: parts.popleft() # c_agg WAS ON TOP yield ( tuple(p for p in parts if p is not None), tuple(coord), c_agg, selects ) continue stack.append(gen) gen = _children(c_agg, children)
def data(): is_sent = Matrix(dims=dims) give_me_zeros = query.sort and not query.groupby if give_me_zeros: # WE REQUIRE THE ZEROS FOR SORTING all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS ordered_coord = next(all_coord)[::-1] output = None for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): if coord != ordered_coord: # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES if output is not None: for s in all_selects: i = name2index[s.name] if output[i] is None: output[i] = s.default # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT ordered_coord = next(all_coord)[::-1] while coord != ordered_coord: # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects] yield record ordered_coord = next(all_coord)[::-1] # coord == missing_coord output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects] for select in ss: v = select.pull(agg) if v != None: union(output, name2index[select.name], v, select.aggregate) yield output else: last_coord = None # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS output = None for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders): if coord != last_coord: if output: # SET DEFAULTS for i, s in enumerate(all_selects): v = output[rank+i] if v == None: output[rank+i] = s.default yield output output = is_sent[coord] if output == None: output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects] last_coord = coord # THIS IS A TRICK! WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED for select in ss: v = select.pull(agg) if v != None: union(output, name2index[select.name], v, select.aggregate) if output: # SET DEFAULTS ON LAST ROW for i, s in enumerate(all_selects): v = output[rank+i] if v == None: output[rank+i] = s.default yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for coord, output in is_sent: if output == None: record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects] yield record