Beispiel #1
0
    def testMemUtil(self):
        """ Test memory usage tracks target """
        cc = LruCache(mem_target=5000)
        self.assertEqual(len(cc), 0)
        ids = set()
        for i in range(10):
            id = createObjId("chunks")
            ids.add(id)
            arr = np.empty((16, 16), dtype='i4')  # 1024 bytes
            arr[...] = i
            cc[id] = arr
            self.assertTrue(id in cc)

        cc.consistencyCheck()
        self.assertTrue(
            len(cc) <
            10)  # given mem-target, some items should have been removed
        mem_per = cc.cacheUtilizationPercent
        self.assertTrue(mem_per < 100)
        mem_dirty = cc.memDirty
        self.assertEqual(mem_dirty, 0)

        # add 10 more chunks, but set dirty to true each time
        for i in range(10):
            id = createObjId("chunks")
            ids.add(id)
            arr = np.empty((16, 16), dtype='i4')  # 1024 bytes
            arr[...] = i
            cc[id] = arr
            self.assertTrue(id in cc)
            cc.setDirty(id)
            cc.consistencyCheck()
            mem_dirty = cc.memDirty
            self.assertEqual(mem_dirty, 1024 * (i + 1))

        mem_per = cc.cacheUtilizationPercent
        # chunks are dirty so percent is over 100%
        self.assertTrue(mem_per > 100)

        # clear dirty flags (allowing memory to be released)
        id_list = []
        for id in cc:
            id_list.append(id)

        random.shuffle(id_list)  # randomize the order we clear dirty flag

        id = id_list[0]
        cc.clearDirty(id)
        cc.consistencyCheck()

        for id in id_list:
            self.assertTrue(id in ids)
            mem_dirty = cc.memDirty
            if cc.isDirty(id):
                cc.clearDirty(id)
                self.assertTrue(cc.memDirty < mem_dirty)
        mem_per = cc.cacheUtilizationPercent
        # mem percent should be less than 100 now

        self.assertTrue(mem_per <= 100)
Beispiel #2
0
    def testClearCache(self):
        """ Check LRU clear logic """
        cc = LruCache(mem_target=1024 * 1024 *
                      1024)  # big enough that there shouldn't be any cleanup
        self.assertEqual(len(cc), 0)
        ids = []
        # add chunks to the cache
        for i in range(10):
            id = createObjId("chunks")
            ids.append(id)
            arr = np.empty((16, 16), dtype='i4')  # 1024 bytes
            arr[...] = i
            cc[id] = arr
        for id in cc:
            self.assertTrue(id.startswith("c-"))
            self.assertTrue(id in ids)
        self.assertEqual(len(cc), 10)
        self.assertEqual(cc._lru_head._id, ids[-1])
        self.assertEqual(cc._lru_tail._id, ids[0])
        self.assertEqual(cc.dirtyCount, 0)
        cc.consistencyCheck()

        cc.clearCache()
        self.assertEqual(len(cc), 0)

        cc.consistencyCheck()
Beispiel #3
0
    def testLRU(self):
        """ Check LRU replacement logic """
        cc = LruCache(mem_target=1024 * 1024 *
                      1024)  # big enough that there shouldn't be any cleanup
        self.assertEqual(len(cc), 0)
        ids = []
        # add chunks to the cache
        for i in range(10):
            id = createObjId("chunks")
            ids.append(id)
            arr = np.empty((16, 16), dtype='i4')  # 1024 bytes
            arr[...] = i
            cc[id] = arr
        for id in cc:
            self.assertTrue(id.startswith("c-"))
            self.assertTrue(id in ids)
        self.assertEqual(len(cc), 10)
        self.assertEqual(cc._lru_head._id, ids[-1])
        self.assertEqual(cc._lru_tail._id, ids[0])
        self.assertEqual(cc.dirtyCount, 0)
        cc.consistencyCheck()

        node = cc._lru_head
        for i in range(10):
            self.assertEqual(node._id, ids[9 - i])
            node = node._next
        self.assertTrue(node is None)

        chunk_5 = ids[5]
        cc.consistencyCheck()

        np_arr = cc[chunk_5]
        self.assertEqual(np_arr[0, 0], 5)
        # the get should have moved this guy to the front
        self.assertEqual(cc._lru_head._id, chunk_5)
        for i in range(10):
            self.assertFalse(cc.isDirty(ids[i]))
        # shouldn't have effected the position
        self.assertEqual(cc._lru_head._id, chunk_5)
        # set chunk 7 to dirty
        chunk_7 = ids[7]
        cc.consistencyCheck()
        cc.setDirty(chunk_7)
        cc.consistencyCheck()
        self.assertEqual(cc.dirtyCount, 1)
        # clear dirty
        cc.clearDirty(chunk_7)
        self.assertEqual(cc.dirtyCount, 0)

        random.shuffle(ids)  # randomize the order we remove chunks
        for i in range(10):
            # remove random chunk
            chunk_id = ids[i]
            del cc[chunk_id]
            cc.consistencyCheck()
        self.assertEqual(len(cc), 0)
        self.assertEqual(cc._lru_head, None)
        self.assertEqual(cc._lru_tail, None)
        cc.consistencyCheck()
Beispiel #4
0
    def testCreateObjId(self):
        id_len = 38  # 36 for uuid plus two for prefix ("g-", "d-")
        ids = set()
        for obj_class in ('groups', 'datasets', 'datatypes', 'chunks'):
            for i in range(100):
                id = createObjId(obj_class)
                self.assertEqual(len(id), id_len)
                self.assertTrue(id[0] in ('g', 'd', 't', 'c'))
                self.assertEqual(id[1], '-')
                ids.add(id)

        self.assertEqual(len(ids), 400)
        try:
            createObjId("bad_class")
            self.assertTrue(False)  # should throw exception
        except ValueError:
            pass  # expected
Beispiel #5
0
    def testSchema2Id(self):
        root_id = createObjId("roots")
        group_id = createObjId("groups", rootid=root_id)
        dataset_id = createObjId("datasets", rootid=root_id)
        ctype_id = createObjId("datatypes", rootid=root_id)

        self.assertEqual(getCollectionForId(root_id), "groups")
        self.assertEqual(getCollectionForId(group_id), "groups")
        self.assertEqual(getCollectionForId(dataset_id), "datasets")
        self.assertEqual(getCollectionForId(ctype_id), "datatypes")
        chunk_id = 'c' + dataset_id[1:] + "_1_2"
        print(chunk_id)
        chunk_partition_id = 'c42-' + dataset_id[2:] + "_1_2"

        for id in (chunk_id, chunk_partition_id):
            try:
                getCollectionForId(id)
                self.assertTrue(False)
            except ValueError:
                pass  # expected
        valid_ids = (group_id, dataset_id, ctype_id, chunk_id,
                     chunk_partition_id, root_id)
        s3prefix = getS3Key(root_id)
        self.assertTrue(s3prefix.endswith("/.group.json"))
        s3prefix = s3prefix[:-(len(".group.json"))]
        for oid in valid_ids:
            print("oid:", oid)
            self.assertTrue(len(oid) >= 38)
            parts = oid.split('-')
            self.assertEqual(len(parts), 6)
            self.assertTrue(oid[0] in ('g', 'd', 't', 'c'))
            self.assertTrue(isSchema2Id(oid))
            if oid == root_id:
                self.assertTrue(isRootObjId(oid))
            else:
                self.assertFalse(isRootObjId(oid))
            self.assertEqual(getRootObjId(oid), root_id)

            s3key = getS3Key(oid)
            print(s3key)
            self.assertTrue(s3key.startswith(s3prefix))
            self.assertEqual(getObjId(s3key), oid)
            self.assertTrue(isS3ObjKey(s3key))
Beispiel #6
0
 def testGetObjPartition(self):
     node_count = 12
     for obj_class in ('groups', 'datasets', 'datatypes', 'chunks'):
         for i in range(100):
             id = createObjId(obj_class)
             node_number = getObjPartition(id, node_count)
             self.assertTrue(node_number >= 0)
             self.assertTrue(node_number < node_count)
     # try a domain partition
     node_number = getObjPartition("/home/test_user1", node_count)
     self.assertTrue(node_number >= 0)
     self.assertTrue(node_number < node_count)
Beispiel #7
0
    def testMetaDataCache(self):
        """ check metadata cache functionality """
        cc = LruCache(mem_target=1024 * 10, name="ChunkCache")
        cc.consistencyCheck()

        self.assertEqual(len(cc), 0)
        self.assertEqual(cc.dump_lru(), "->\n<-\n")

        data = {"x": 123, "y": 456}

        rand_id = createObjId("groups")
        data = {"foo": "bar"}
        cc[rand_id] = data  # add to cache
        cc.consistencyCheck()
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)
        lru_str = "->" + rand_id + "\n<-" + rand_id + "\n"
        mem_tgt = cc.memTarget
        self.assertEqual(mem_tgt, 1024 * 10)
        mem_used = cc.memUsed
        self.assertEqual(mem_used, 1024)  # not based on actual size
        mem_per = cc.cacheUtilizationPercent
        self.assertEqual(mem_per, 10)  # have used 10% of target memory
        # try out the dirty flags
        self.assertFalse(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 0)
        cc.setDirty(rand_id)
        cc.consistencyCheck()
        self.assertTrue(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 1)
        self.assertEqual(cc.dump_lru(), lru_str)
        cc.clearDirty(rand_id)
        cc.consistencyCheck()
        self.assertFalse(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 0)
        # chunk should not have been evicted from cache
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)
        # delete from cache
        del cc[rand_id]
        cc.consistencyCheck()
        # check cache is empty
        self.assertEqual(len(cc), 0)
        self.assertFalse(rand_id in cc)
        mem_tgt = cc.memTarget
        self.assertEqual(mem_tgt, 1024 * 10)
        mem_used = cc.memUsed
        self.assertEqual(mem_used, 0)
        mem_per = cc.cacheUtilizationPercent
        self.assertEqual(mem_per, 0)  # no memory used
Beispiel #8
0
    def testSimple(self):
        """ check basic functions by adding one chunk to cache """
        cc = LruCache(mem_target=1000 * 1000 * 10)
        cc.consistencyCheck()

        self.assertEqual(len(cc), 0)
        self.assertEqual(cc.dump_lru(), "->\n<-\n")

        self.assertFalse("xyz" in cc)

        id = createObjId("chunks")
        try:
            # only dict objects can be added
            cc[id] = list(range(20))
            self.assertTrue(False)
        except TypeError:
            pass  # expected
        arr = np.empty((16, 16), dtype='i4')
        id = createObjId("datasets")
        try:
            cc[id] = arr
            self.assertTrue(False)
        except ValueError:
            pass  # expected - not a chunk id

        rand_id = createObjId("chunks")
        np_arr = np.random.random(
            (500, 500))  # smaller than our chunk cache size
        cc[rand_id] = np_arr  # add to cache
        cc.consistencyCheck()
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)
        lru_str = "->" + rand_id + "\n<-" + rand_id + "\n"
        mem_tgt = cc.memTarget
        self.assertEqual(mem_tgt, 1000 * 1000 * 10)
        mem_used = cc.memUsed
        self.assertEqual(mem_used, 500 * 500 * 8)
        mem_dirty = cc.memDirty
        self.assertEqual(mem_dirty, 0)
        mem_per = cc.cacheUtilizationPercent
        self.assertEqual(mem_per, 20)  # have used 20% of target memory

        # try adding the same id to the cache again
        cc[rand_id] = np_arr
        cc.consistencyCheck()
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)

        # try out the dirty flags
        self.assertFalse(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 0)
        cc.setDirty(rand_id)
        cc.consistencyCheck()
        self.assertTrue(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 1)
        self.assertEqual(cc.dump_lru(), lru_str)
        cc.consistencyCheck()
        cc.clearDirty(rand_id)
        cc.consistencyCheck()
        self.assertFalse(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 0)
        # chunk should not have been evicted from cache
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)
        # delete from cache
        del cc[rand_id]
        cc.consistencyCheck()
        # check cache is empty
        self.assertEqual(len(cc), 0)
        self.assertFalse(rand_id in cc)
        mem_tgt = cc.memTarget
        self.assertEqual(mem_tgt, 1000 * 1000 * 10)
        mem_used = cc.memUsed
        self.assertEqual(mem_used, 0)
        mem_dirty = cc.memDirty
        self.assertEqual(mem_dirty, 0)
        mem_per = cc.cacheUtilizationPercent
        self.assertEqual(mem_per, 0)  # no memory used
Beispiel #9
0
    def testMetaDataCache(self):
        """ check metadata cache functionality """
        cc = LruCache(mem_target=1024 * 10, chunk_cache=False)
        cc.consistencyCheck()

        self.assertEqual(len(cc), 0)
        self.assertEqual(cc.dump_lru(), "->\n<-\n")

        id = createObjId("datasets")
        try:
            # only numpy arrays an be added
            cc[id] = np.zeros((3, 4))
            self.assertTrue(False)
        except TypeError:
            pass  # expected
        data = {"x": 123, "y": 456}
        arr = np.zeros((10, ))
        id = createObjId("chunks")
        try:
            cc[id] = arr
            self.assertTrue(False)
        except TypeError:
            pass  # expected - not a dict

        rand_id = createObjId("groups")
        data = {"foo": "bar"}
        cc[rand_id] = data  # add to cache
        cc.consistencyCheck()
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)
        lru_str = "->" + rand_id + "\n<-" + rand_id + "\n"
        mem_tgt = cc.memTarget
        self.assertEqual(mem_tgt, 1024 * 10)
        mem_used = cc.memUsed
        self.assertEqual(mem_used, 1024)  # not based on actual size
        mem_per = cc.cacheUtilizationPercent
        self.assertEqual(mem_per, 10)  # have used 10% of target memory
        # try out the dirty flags
        self.assertFalse(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 0)
        cc.setDirty(rand_id)
        cc.consistencyCheck()
        self.assertTrue(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 1)
        self.assertEqual(cc.dump_lru(), lru_str)
        cc.clearDirty(rand_id)
        cc.consistencyCheck()
        self.assertFalse(cc.isDirty(rand_id))
        self.assertEqual(cc.dirtyCount, 0)
        # chunk should not have been evicted from cache
        self.assertEqual(len(cc), 1)
        self.assertTrue(rand_id in cc)
        # delete from cache
        del cc[rand_id]
        cc.consistencyCheck()
        # check cache is empty
        self.assertEqual(len(cc), 0)
        self.assertFalse(rand_id in cc)
        mem_tgt = cc.memTarget
        self.assertEqual(mem_tgt, 1024 * 10)
        mem_used = cc.memUsed
        self.assertEqual(mem_used, 0)
        mem_per = cc.cacheUtilizationPercent
        self.assertEqual(mem_per, 0)  # no memory used