Esempio n. 1
0
 def __iter__(self):
     if not self.archives:
         return six.iteritems(self.combined)
     items = self.combined.items()
     items.sort(key=itemgetter(0))
     combined = items
     self.archives.append(iter(combined))
     iters = list(map(iter, self.archives))
     if self.rddconf.is_groupby and self.rddconf.iter_group:
         heap = HeapOnKey(key=lambda x: x[0], min_heap=True)
         it = GroupByNestedIter(heap.merge(iters), "")
     else:
         it = heap_merged(iters, self._get_merge_function())
     return it
Esempio n. 2
0
    def test_merge(self):
        N = 100
        n = 13
        a = list(range(N))
        random.shuffle(a)
        a = list(enumerate(a))
        b = a
        lsts = []
        while len(b):
            lsts.append(b[:n])
            b = b[n:]

        key = lambda x: x[1]
        lsts = list(map(lambda x: sorted(x, key=key), lsts))
        # pprint(lsts)

        h = HeapOnKey(key=key, min_heap=True)
        r = list(h.merge(lsts))
        exp = sorted(a, key=key)
        # pprint(exp)
        # pprint(r)

        assert r == exp
Esempio n. 3
0
 def _merge_sorted(self, iters):
     heap = HeapOnKey(key=lambda x: x[0], min_heap=True)
     return GroupByNestedIter(heap.merge(iters), self.call_site)
Esempio n. 4
0
 def _merge_sorted(self, iters):
     heap = HeapOnKey(key=lambda x: x[0], min_heap=True)
     merged = heap.merge(iters)
     return self.aggregator.aggregate_sorted(merged)
Esempio n. 5
0
 def merge(self, iters):
     heap = HeapOnKey(key=lambda x: x[0], min_heap=True)
     self.combined = GroupByNestedIter(heap.merge(iters), self.rdd_name)
Esempio n. 6
0
 def merge(self, iters):
     heap = HeapOnKey(key=lambda x: x[0], min_heap=True)
     self.combined = self.aggregator.aggregate_sorted(heap.merge(iters))