def run_partly(self, chunksize): tree = BTree(chunksize, None, None) items_since_last_group = 0 last_key = None for k, v in self._generate_input(): if self._check_stop(): return if k in tree: tree[k].append(v) else: tree[k] = [v] self.processed += 1 items_since_last_group += 1 if items_since_last_group >= (chunksize*self.num_workers) and k != last_key: #only output elements periodically and once key changes keyleafs = [l for l in tree.get_leafs()] for key, leaf in keyleafs: for k, v in zip(leaf.keys, leaf.values): self._output((k,v)) tree = BTree(chunksize, None, None) items_since_last_group = 0 last_key = k keyleafs = [l for l in tree.get_leafs()] for key, leaf in keyleafs: for k, v in zip(leaf.keys, leaf.values): self._output((k,v))
def run(self, chunksize=10): tree = BTree(chunksize, None, None) for k, v in self._generate_input(): if self._check_stop(): return if k in tree: tree[k] = self.reduce_func(tree[k], v) else: tree[k] = v self.processed += 1 keyleafs = [l for l in tree.get_leafs()] for key, leaf in keyleafs: for k, v in zip(leaf.keys, leaf.values): self._output((k,v))
def run(self, chunksize=10): if self.partly: return self.run_partly(chunksize) tree = BTree(chunksize, None, None) for k, v in self._generate_input(): if self._check_stop(): return if k in tree: tree[k].append(v) else: tree[k] = [v] self.processed += 1 keyleafs = [l for l in tree.get_leafs()] for key, leaf in keyleafs: for k, v in zip(leaf.keys, leaf.values): self._output((k,v))