Beispiel #1
0
	def run_partly(self, chunksize):
		tree = BTree(chunksize, None, None)

		items_since_last_group = 0
		last_key = None
		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1
			items_since_last_group += 1

			if items_since_last_group >= (chunksize*self.num_workers) and k != last_key: #only output elements periodically and once key changes
				keyleafs = [l for l in tree.get_leafs()]
				for key, leaf in keyleafs:
					for k, v in zip(leaf.keys, leaf.values):
						self._output((k,v))
				tree = BTree(chunksize, None, None)
				items_since_last_group = 0

			last_key = k

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Beispiel #2
0
	def run(self, chunksize=10):
		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k] = self.reduce_func(tree[k], v)
			else:
				tree[k] = v
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Beispiel #3
0
	def run(self, chunksize=10):
		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k] = self.reduce_func(tree[k], v)
			else:
				tree[k] = v
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Beispiel #4
0
    def test_persistent(self):
        #load with numbers
        numbers = [x for x in range(1, 1000)]
        random.shuffle(numbers)

        for number in numbers:
            self.btree.insert(IntClass(number), IntClass(100 + number))

        #serialize and deserialize again
        treejson = self.btree.to_JSON()
        self.leaffactory = BTreeFileLeafFactory(
            os.path.join(self.tempdir, "page"), ".index")
        self.btree = BTree.from_JSON(treejson, IntClass, IntClass,
                                     self.leaffactory)

        #check for numbers
        for number in numbers:
            leaf = self.btree.root.search(IntClass(number))
            leaf.ensure_load()
            self.assertIn(number, [key.integer for key in leaf.keys])

        self.assertGreater(len(self.leaffactory.allocated_leaves), 2)

        #test delete to check if parent correctly set on all nodes and tree works
        random.shuffle(numbers)
        for number in numbers:
            leaf = self.btree.root.search(IntClass(number))
            leaf.ensure_load()
            self.assertIn(number, [
                key.integer
                for key in self.btree.root.search(IntClass(number)).keys
            ])
            self.assertIn(100 + number, [
                value.integer
                for value in self.btree.root.search(IntClass(number)).values
            ])

            self.btree.remove(IntClass(number))

            leaf = self.btree.root.search(IntClass(number))
            leaf.ensure_load()
            self.assertNotIn(number, [
                key.integer
                for key in self.btree.root.search(IntClass(number)).keys
            ])
            self.assertNotIn(100 + number, [
                value.integer
                for value in self.btree.root.search(IntClass(number)).values
            ])
            self.leaffactory.cleanup()

        self.assertEqual(0, len(self.btree.root))
        self.assertEqual([], self.btree.root.childs[0].keys)
        self.assertEqual([], self.btree.root.childs[0].values)
        self.assertEqual(1, len(self.leaffactory.allocated_leaves))
Beispiel #5
0
	def run(self, chunksize=10):
		if self.partly:
			return self.run_partly(chunksize)

		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Beispiel #6
0
	def run(self, chunksize=10):
		if self.partly:
			return self.run_partly(chunksize)

		tree = BTree(chunksize, None, None)

		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Beispiel #7
0
	def run_partly(self, chunksize):
		tree = BTree(chunksize, None, None)

		items_since_last_group = 0
		last_key = None
		for k, v in self._generate_input():
			if self._check_stop():
				return

			if k in tree:
				tree[k].append(v)
			else:
				tree[k] = [v]
			self.processed += 1
			items_since_last_group += 1

			if items_since_last_group >= (chunksize*self.num_workers) and k != last_key: #only output elements periodically and once key changes
				keyleafs = [l for l in tree.get_leafs()]
				for key, leaf in keyleafs:
					for k, v in zip(leaf.keys, leaf.values):
						self._output((k,v))
				tree = BTree(chunksize, None, None)
				items_since_last_group = 0

			last_key = k

		keyleafs = [l for l in tree.get_leafs()]
		for key, leaf in keyleafs:
			for k, v in zip(leaf.keys, leaf.values):
				self._output((k,v))
Beispiel #8
0
    def test_persistent(self):
        # load with numbers
        numbers = [x for x in range(1, 1000)]
        random.shuffle(numbers)

        for number in numbers:
            self.btree.insert(IntClass(number), IntClass(100 + number))

            # serialize and deserialize again
        treejson = self.btree.to_JSON()
        self.leaffactory = BTreeFileLeafFactory(os.path.join(self.tempdir, "page"), ".index")
        self.btree = BTree.from_JSON(treejson, IntClass, IntClass, self.leaffactory)

        # check for numbers
        for number in numbers:
            leaf = self.btree.root.search(IntClass(number))
            leaf.ensure_load()
            self.assertIn(number, [key.integer for key in leaf.keys])

        self.assertGreater(len(self.leaffactory.allocated_leaves), 2)

        # test delete to check if parent correctly set on all nodes and tree works
        random.shuffle(numbers)
        for number in numbers:
            leaf = self.btree.root.search(IntClass(number))
            leaf.ensure_load()
            self.assertIn(number, [key.integer for key in self.btree.root.search(IntClass(number)).keys])
            self.assertIn(100 + number, [value.integer for value in self.btree.root.search(IntClass(number)).values])

            self.btree.remove(IntClass(number))

            leaf = self.btree.root.search(IntClass(number))
            leaf.ensure_load()
            self.assertNotIn(number, [key.integer for key in self.btree.root.search(IntClass(number)).keys])
            self.assertNotIn(100 + number, [value.integer for value in self.btree.root.search(IntClass(number)).values])
            self.leaffactory.cleanup()

        self.assertEqual(0, len(self.btree.root))
        self.assertEqual([], self.btree.root.childs[0].keys)
        self.assertEqual([], self.btree.root.childs[0].values)
        self.assertEqual(1, len(self.leaffactory.allocated_leaves))
Beispiel #9
0
 def setUp(self):
     self.tempdir = tempfile.mkdtemp()
     self.leaffactory = BTreeFileLeafFactory(os.path.join(self.tempdir, "page"), ".index")
     self.btree = BTree(50, IntClass, IntClass, self.leaffactory)
Beispiel #10
0
 def setUp(self):
     self.btree = BTree(3, int, int)
Beispiel #11
0
class TestBTree(unittest.TestCase):
    def setUp(self):
        self.btree = BTree(3, int, int)

    def tearDown(self):
        pass

    def test_insert(self):
        self.btree.insert(1, 101)
        self.assertEquals(1, self.btree.root.childs[0].keys[0])
        self.assertEquals(101, self.btree.root.childs[0].values[0])

        self.btree.insert(2, 102)
        self.assertEqual(1, self.btree.root.childs[0].keys[0])
        self.assertEqual(101, self.btree.root.childs[0].values[0])
        self.assertEqual(2, self.btree.root.childs[0].keys[1])
        self.assertEqual(102, self.btree.root.childs[0].values[1])

        self.btree.insert(3, 103)
        self.assertEqual([1, 2, 3], self.btree.root.childs[0].keys)
        self.assertEqual([101, 102, 103], self.btree.root.childs[0].values)

        self.btree.insert(4, 104)
        self.assertEqual([1, 2], self.btree.root.childs[0].keys)
        self.assertEqual([101, 102], self.btree.root.childs[0].values)
        self.assertEqual([3, 4], self.btree.root.childs[1].keys)
        self.assertEqual([103, 104], self.btree.root.childs[1].values)
        self.assertEqual([3], self.btree.root.keys)

        numbers = [x for x in range(5, 100)]
        random.shuffle(numbers)

        for number in numbers:
            self.btree.insert(number, 100 + number)

        for number in numbers:
            self.assertIn(number, self.btree.root.search(number).keys)
            self.assertIn(100 + number, self.btree.root.search(number).values)

    def test_remove(self):
        # insert
        numbers = [x for x in range(1, 1000)]
        # numbers = [4, 13, 2, 12, 9, 10, 8, 1, 6, 5, 3, 7, 11, 14]
        random.shuffle(numbers)

        for number in numbers:
            self.btree.insert(number, 100 + number)

            # test delete
        random.shuffle(numbers)

        # numbers = [10,6,14,12,9,4,5,3,1,13,8,11,2,7]
        for number in numbers:
            self.assertIn(number, self.btree.root.search(number).keys)
            self.assertIn(100 + number, self.btree.root.search(number).values)
            self.btree.remove(number)
            self.assertNotIn(number, self.btree.root.search(number).keys)
            self.assertNotIn(100 + number, self.btree.root.search(number).values)

        self.assertEqual(0, len(self.btree.root))
        self.assertEqual([], self.btree.root.childs[0].keys)
        self.assertEqual([], self.btree.root.childs[0].values)
Beispiel #12
0
 def setUp(self):
     self.btree = BTree(3, int, int)
Beispiel #13
0
class TestBTree(unittest.TestCase):
    def setUp(self):
        self.btree = BTree(3, int, int)

    def tearDown(self):
        pass

    def test_insert(self):
        self.btree.insert(1, 101)
        self.assertEqual(1, self.btree.root.childs[0].keys[0])
        self.assertEqual(101, self.btree.root.childs[0].values[0])

        self.btree.insert(2, 102)
        self.assertEqual(1, self.btree.root.childs[0].keys[0])
        self.assertEqual(101, self.btree.root.childs[0].values[0])
        self.assertEqual(2, self.btree.root.childs[0].keys[1])
        self.assertEqual(102, self.btree.root.childs[0].values[1])

        self.btree.insert(3, 103)
        self.assertEqual([1, 2, 3], self.btree.root.childs[0].keys)
        self.assertEqual([101, 102, 103], self.btree.root.childs[0].values)

        self.btree.insert(4, 104)
        self.assertEqual([1, 2], self.btree.root.childs[0].keys)
        self.assertEqual([101, 102], self.btree.root.childs[0].values)
        self.assertEqual([3, 4], self.btree.root.childs[1].keys)
        self.assertEqual([103, 104], self.btree.root.childs[1].values)
        self.assertEqual([3], self.btree.root.keys)

        numbers = [x for x in range(5, 100)]
        random.shuffle(numbers)

        for number in numbers:
            self.btree.insert(number, 100 + number)

        for number in numbers:
            self.assertIn(number, self.btree.root.search(number).keys)
            self.assertIn(100 + number, self.btree.root.search(number).values)

    def test_remove(self):
        #insert
        numbers = [x for x in range(1, 1000)]
        #numbers = [4, 13, 2, 12, 9, 10, 8, 1, 6, 5, 3, 7, 11, 14]
        random.shuffle(numbers)

        for number in numbers:
            self.btree.insert(number, 100 + number)

        #test delete
        random.shuffle(numbers)

        #numbers = [10,6,14,12,9,4,5,3,1,13,8,11,2,7]
        for number in numbers:
            self.assertIn(number, self.btree.root.search(number).keys)
            self.assertIn(100 + number, self.btree.root.search(number).values)
            self.btree.remove(number)
            self.assertNotIn(number, self.btree.root.search(number).keys)
            self.assertNotIn(100 + number,
                             self.btree.root.search(number).values)

        self.assertEqual(0, len(self.btree.root))
        self.assertEqual([], self.btree.root.childs[0].keys)
        self.assertEqual([], self.btree.root.childs[0].values)
Beispiel #14
0
 def setUp(self):
     self.tempdir = tempfile.mkdtemp()
     self.leaffactory = BTreeFileLeafFactory(
         os.path.join(self.tempdir, "page"), ".index")
     self.btree = BTree(50, IntClass, IntClass, self.leaffactory)