コード例 #1
0
def ibytes2ilines(generator, encoding="utf8", flexible=False, closer=None):
    """
    CONVERT A GENERATOR OF (ARBITRARY-SIZED) byte BLOCKS
    TO A LINE (CR-DELIMITED) GENERATOR

    :param generator:
    :param encoding: None TO DO NO DECODING
    :param closer: OPTIONAL FUNCTION TO RUN WHEN DONE ITERATING
    :return:
    """
    decode = get_decoder(encoding=encoding, flexible=flexible)
    _buffer = next(generator)
    s = 0
    e = _buffer.find(b"\n")
    while True:
        while e == -1:
            try:
                next_block = next(generator)
                _buffer = _buffer[s:] + next_block
                s = 0
                e = _buffer.find(b"\n")
            except StopIteration:
                _buffer = _buffer[s:]
                del generator
                if closer:
                    closer()
                if _buffer:
                    yield decode(_buffer)
                return

        yield decode(_buffer[s:e])
        s = e + 1
        e = _buffer.find(b"\n", s)
コード例 #2
0
 def __getitem__(self, item):
     try:
         if item == self._next:
             self._next += 1
             return next(self._iter)
         elif item == self._next - 1:
             return self._last
         else:
             Log.error("can not index out-of-order too much")
     except Exception as e:
         Log.error("Problem indexing", e)
コード例 #3
0
 def __iter__(self):
     if not self.concat:
         return
     it = self.concat.__iter__()
     v = next(it)
     for vv in v:
         yield vv
     for v in it:
         for s in self.sep:
             yield s
         for vv in v:
             yield vv
コード例 #4
0
def aggs_iterator(aggs, es_query, decoders, give_me_zeros=False):
    """
    DIG INTO ES'S RECURSIVE aggs DATA-STRUCTURE:
    RETURN AN ITERATOR OVER THE EFFECTIVE ROWS OF THE RESULTS

    :param aggs: ES AGGREGATE OBJECT
    :param es_query: THE ABSTRACT ES QUERY WE WILL TRACK ALONGSIDE aggs
    :param decoders: TO CONVERT PARTS INTO COORDINATES
    """
    coord = [0] * len(decoders)
    parts = deque()
    stack = []

    gen = _children(aggs, es_query.children)
    while True:
        try:
            index, c_agg, c_query, part = next(gen)
        except StopIteration:
            try:
                gen = stack.pop()
            except IndexError:
                return
            parts.popleft()
            continue

        if c_agg.get('doc_count') == 0 and not give_me_zeros:
            continue
        parts.appendleft(part)
        for d in c_query.decoders:
            coord[d.edge.dim] = d.get_index(tuple(p for p in parts if p is not None), c_query, index)

        children = c_query.children
        selects = c_query.selects
        if selects or not children:
            parts.popleft()  # c_agg WAS ON TOP
            yield (
                tuple(p for p in parts if p is not None),
                tuple(coord),
                c_agg,
                selects
            )
            continue

        stack.append(gen)
        gen = _children(c_agg, children)
コード例 #5
0
    def data():
        is_sent = Matrix(dims=dims)
        give_me_zeros = query.sort and not query.groupby
        if give_me_zeros:
            # WE REQUIRE THE ZEROS FOR SORTING
            all_coord = is_sent._all_combos()  # TRACK THE EXPECTED COMBINATIONS
            ordered_coord = next(all_coord)[::-1]
            output = None
            for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
                if coord != ordered_coord:
                    # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES
                    if output is not None:
                        for s in all_selects:
                            i = name2index[s.name]
                            if output[i] is None:
                                output[i] = s.default
                        # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT
                        ordered_coord = next(all_coord)[::-1]

                while coord != ordered_coord:
                    # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord
                    record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects]
                    yield record
                    ordered_coord = next(all_coord)[::-1]
                # coord == missing_coord
                output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects]
                for select in ss:
                    v = select.pull(agg)
                    if v != None:
                        union(output, name2index[select.name], v, select.aggregate)
                yield output
        else:
            last_coord = None   # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS
            output = None
            for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
                if coord != last_coord:
                    if output:
                        # SET DEFAULTS
                        for i, s in enumerate(all_selects):
                            v = output[rank+i]
                            if v == None:
                                output[rank+i] = s.default
                        yield output
                    output = is_sent[coord]
                    if output == None:
                        output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects]
                    last_coord = coord
                # THIS IS A TRICK!  WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
                for select in ss:
                    v = select.pull(agg)
                    if v != None:
                        union(output, name2index[select.name], v, select.aggregate)

            if output:
                # SET DEFAULTS ON LAST ROW
                for i, s in enumerate(all_selects):
                    v = output[rank+i]
                    if v == None:
                        output[rank+i] = s.default
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for coord, output in is_sent:
                    if output == None:
                        record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects]
                        yield record