Esempio n. 1
0
	def run_partly(self, chunksize):
		tree = BTree(chunksize, None, None)

		items_since_last_group = 0
		last_key = None
		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1
			items_since_last_group += 1

			if items_since_last_group >= (chunksize*self.num_workers) and k != last_key: #only output elements periodically and once key changes
				keyleafs = [l for l in tree.get_leafs()]
				for key, leaf in keyleafs:
					for k, v in zip(leaf.keys, leaf.values):
						self._output((k,v))
				tree = BTree(chunksize, None, None)
				items_since_last_group = 0

			last_key = k

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Esempio n. 2
0
	def run_partly(self, chunksize):
		tree = BTree(chunksize, None, None)

		items_since_last_group = 0
		last_key = None
		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1
			items_since_last_group += 1

			if items_since_last_group >= (chunksize*self.num_workers) and k != last_key: #only output elements periodically and once key changes
				keyleafs = [l for l in tree.get_leafs()]
				for key, leaf in keyleafs:
					for k, v in zip(leaf.keys, leaf.values):
						self._output((k,v))
				tree = BTree(chunksize, None, None)
				items_since_last_group = 0

			last_key = k

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Esempio n. 3
0
	def run(self, chunksize=10):
		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k] = self.reduce_func(tree[k], v)
			else:
				tree[k] = v
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Esempio n. 4
0
	def run(self, chunksize=10):
		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k] = self.reduce_func(tree[k], v)
			else:
				tree[k] = v
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Esempio n. 5
0
	def run(self, chunksize=10):
		if self.partly:
			return self.run_partly(chunksize)

		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Esempio n. 6
0
	def run(self, chunksize=10):
		if self.partly:
			return self.run_partly(chunksize)

		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))