Beispiel #1
0
    def test_split_already_persistent(self):

        bn, bm = (2, 1)
        x = np.arange(100).reshape(10, -1)
        blocks = []
        for i in range(0, x.shape[0], bn):
            row = [x[i: i + bn, j: j + bm] for j in range(0, x.shape[1], bm)]
            blocks.append(row)

        data = StorageNumpy(input_array=x, name="test_split_already_persistent")

        data.sync() # Flush values to cassandra
        for i, chunk in enumerate(data.np_split(block_size=(bn, bm))):
            storage_id = chunk.storage_id
            chunk.sync() #Flush data
            del chunk
            chunk = getByID(storage_id)
            self.assertTrue(np.array_equal(list(chunk), blocks[i]))

        del data
        gc.collect()

        data = StorageNumpy(name="test_split_already_persistent")
        self.assertTrue(np.array_equal(list(data), x))

        for i, chunk in enumerate(data.np_split(block_size=(bn, bm))):
            storage_id = chunk.storage_id
            chunk.sync() #Flush data
            del chunk
            chunk = getByID(storage_id)
            self.assertTrue(np.array_equal(list(chunk), blocks[i]))

        self.assertEqual(i + 1, len(blocks))
Beispiel #2
0
 def test_load_2_dif_clusters_same_instance(self):
     base = np.arange(50 * 50).reshape((50, 50))
     hecu_p = StorageNumpy(input_array=base, name='load_2_clustrs_same_inst')
     hecu_p.sync() # Flush values to cassandra
     hecu_p_load = StorageNumpy(name="load_2_clustrs_same_inst")
     hecu_p_load[0:1, 0:1]
     self.assertTrue(np.array_equal(hecu_p_load[40:50, 40:50], base[40:50, 40:50]))
Beispiel #3
0
 def test_split_content(self):
     n = np.arange(88*66).reshape(88,66)
     s = StorageNumpy(n,"test_split_content")
     s.sync() # Flush values to cassandra
     del s
     s = StorageNumpy(None,"test_split_content")
     rows = [i for i in s.split(cols=False)]
     self.assertTrue(len(rows)==4)
     columns = [ i for i in s.split(cols=True)]
     self.assertTrue(len(columns)==3)
     blocks = [i for i in s.split()]
     self.assertTrue(len(blocks)==12)
     for i in rows:
         self.assertTrue(i.shape == (22,66))
     for i in columns:
         self.assertTrue(i.shape == (88,22))
     for i in blocks:
         self.assertTrue(i.shape == (22,22))
     self.assertTrue(np.array_equal(rows[0],n[0:22,:]))
     self.assertTrue(np.array_equal(rows[1],n[22:44,:]))
     self.assertTrue(np.array_equal(rows[2],n[44:66,:]))
     self.assertTrue(np.array_equal(rows[3],n[66:,:]))
     self.assertTrue(np.array_equal(columns[0],n[:,0:22]))
     self.assertTrue(np.array_equal(columns[1],n[:,22:44]))
     self.assertTrue(np.array_equal(columns[2],n[:,44:]))
Beispiel #4
0
 def test_load_StorageNumpy(self):
     n = np.arange(2*128).reshape(2,128) # A matrix with "some" columns
     s = StorageNumpy(n, "test_load_StorageNumpy")
     s.sync() # Flush values to cassandra
     s2 = StorageNumpy(None, "test_load_StorageNumpy")
     self.assertTrue(s2._is_persistent)
     self.assertEqual(s.storage_id, s2.storage_id)
Beispiel #5
0
    def test_types_persistence(self):
        base_array = np.arange(256)
        tablename = self.ksp + '.' + "test_types_persistence"

        for typecode in np.typecodes['Integer']:
            if typecode == 'p':
                # TODO For now skip arrays made of pointers
                pass
            typed_array = StorageNumpy(base_array.astype(typecode), tablename)
            self.assertTrue(np.array_equal(typed_array, base_array.astype(typecode)))

            typed_array.sync() # Flush values to cassandra

            typed_array = StorageNumpy(None, tablename)
            self.assertTrue(np.allclose(typed_array, base_array.astype(typecode)))
            typed_array.delete_persistent()

        for typecode in np.typecodes['UnsignedInteger']:
            if typecode == 'P':
                # TODO For now skip arrays made of pointers
                pass
            typed_array = StorageNumpy(base_array.astype(typecode), tablename)
            self.assertTrue(np.allclose(typed_array, base_array.astype(typecode)))

            typed_array.sync() # Flush values to cassandra

            typed_array = StorageNumpy(None, tablename)
            self.assertTrue(np.allclose(typed_array, base_array.astype(typecode)))
            typed_array.delete_persistent()
Beispiel #6
0
 def test_slice_after_load(self):
     n = np.arange(2*128).reshape(2,128) # A matrix with "some" columns
     s = StorageNumpy(n, "test_slice_after_load")
     s.sync() # Flush values to cassandra
     del s
     s = StorageNumpy(None, "test_slice_after_load")
     tmp = s[0,110:150]  # Doing an slice on an unloaded numpy
     self.assertTrue(np.array_equal(tmp, n[0,110:150]))
Beispiel #7
0
 def test_pv_one_dim(self):
     n = np.arange(66*66)
     sn = StorageNumpy(n,"test_pv_one_dim")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_one_dim")
     s1 = 30
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
Beispiel #8
0
 def test_pv_big_np(self):
     n = np.arange(1000*1000).reshape(1000,1000)
     sn = StorageNumpy(n,"test_pv_big_np")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_big_np")
     s1 = (22,22)
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
Beispiel #9
0
 def test_row_access(self):
     n = np.arange(64*128).reshape(64,128) # A matrix with "some" columns
     s = StorageNumpy(n, "test_row_access")
     s.sync() # Flush values to cassandra
     del s
     s = StorageNumpy(None, "test_row_access")
     for i in range(0,64):
         tmp = s[i,:]    # Access a whole row
         self.assertTrue(np.array_equal(tmp, n[i,:]))
Beispiel #10
0
 def test_pv_load_correct_blocks(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_load_correct_blocks")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_load_correct_blocks")
     s1 = (0, slice(None, None, None))
     x = sn[s1]
     self.assertTrue(len(sn._loaded_coordinates) == 3)
Beispiel #11
0
    def test_loaded(self):
        n = np.arange(88*66).reshape(88,66)
        s = StorageNumpy(n, "test_loaded")
        self.assertTrue(s._numpy_full_loaded is True)
        s.sync() # Flush values to cassandra
        del s
        s = StorageNumpy(None, "test_loaded")
        self.assertTrue(s._numpy_full_loaded is False)

        # The accessed element must be FULL loaded
        row = s[0,:]
        self.assertTrue(s._numpy_full_loaded is False)
        self.assertTrue(row._numpy_full_loaded is True)

        del s
        s = StorageNumpy(None, "test_loaded")
        col = s[:, 0]
        self.assertTrue(s._numpy_full_loaded is False)
        self.assertTrue(col._numpy_full_loaded is True)

        del s
        s = StorageNumpy(None, "test_loaded")
        block = s[22:44, 22:44]
        self.assertTrue(s._numpy_full_loaded is False)
        self.assertTrue(block._numpy_full_loaded is True)

        # Loading ALL elements must make the object full loaded
        del s
        s = StorageNumpy(None, "test_loaded")
        for i in range(s.shape[0]):
            x = s[i,:]
        self.assertTrue(s._numpy_full_loaded is True)

        del s
        s = StorageNumpy(None, "test_loaded")
        for i in range(s.shape[1]):
            x = s[:,i]
        self.assertTrue(s._numpy_full_loaded is True)

        # Split MUST NOT load the object
        del s
        s = StorageNumpy(None, "test_loaded")
        rows = [ i for i in s.split(cols=False) ]
        for i in rows:
            self.assertTrue(i._numpy_full_loaded is False)

        del s
        s = StorageNumpy(None, "test_loaded")
        columns = [ i for i in s.split(cols=True) ]
        for i in columns:
            self.assertTrue(i._numpy_full_loaded is False)

        del s
        s = StorageNumpy(None, "test_loaded")
        blocks = [ i for i in s.split() ]
        for i in blocks:
            self.assertTrue(i._numpy_full_loaded is False)
Beispiel #12
0
 def test_pv_only_int(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_only_int")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_only_int")
     s1 = 1
     n1 = sn[s1]
     self.assertTrue(np.array_equal(n[1], n1))
Beispiel #13
0
 def test_pv_three_dimensions(self):
     n = np.arange(3*66*66).reshape(3,66,66)
     sn = StorageNumpy(n,"test_pv_three_dimensions")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_three_dimensions")
     s1 = (0, 1, slice(None, None, None))
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
     s2 = slice(1,10,1)
     self.assertTrue(np.array_equal(sn[s1][s2], n[s1][s2]))
Beispiel #14
0
 def test_pv_special_case(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_special_case")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_special_case")
     s1 = slice(1,65)
     s2 = sn[s1,s1]
     ssf=1
     self.assertTrue(np.array_equal(sn[s1,s1][1], n[s1,s1][1]))
Beispiel #15
0
 def test_pv_slice_from_from_slice_step(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_slice_from_slice_step")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_slice_from_slice_step")
     s1 = slice(1,65,2)
     s2 = slice(1,20,2)
     n1 = sn[s1][s2]
     self.assertTrue(np.array_equal(n1, n[s1][s2]))
Beispiel #16
0
 def test_pv_slice_step(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_slice_step")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_slice_step")
     s1 = slice(1,65,2)
     n2 = sn[s1,s1]
     i=2
     j=30
     self.assertTrue(np.array_equal(n[s1,s1][i,j], n2[i,j]))
Beispiel #17
0
 def test_get_subarray(self):
     base = np.arange(8 * 8 * 4).reshape((8, 8, 4))
     hecu_p = StorageNumpy(input_array=base, name='test_get_subarray')
     hecu_p.sync() # Flush values to cassandra
     hecu_r2 = StorageNumpy(name="test_get_subarray")
     res = hecu_r2[:3, :2]
     sum = res.sum()
     res = hecu_r2[:3, :2]
     avg = res.mean()
     self.assertGreater(sum, 0)
     self.assertGreater(avg, 0)
Beispiel #18
0
 def test_pv_slice_single_row(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_slice_single_row")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_slice_single_row")
     s1 = slice(1,65)
     s2 = slice(1, None, None)
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
     self.assertTrue(np.array_equal(sn[s1][s2], n[s1][s2]))
     self.assertTrue(np.array_equal(sn[s1][s2][s2], n[s1][s2][s2]))
Beispiel #19
0
 def test_pv_int_slice(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_int_slice")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_int_slice")
     # Caso: int, slice
     s1 = slice(1,65)
     s2 = 30
     i=1
     n2 = sn[s2,s1]
     self.assertTrue(np.array_equal(n2[i], n[s2,s1][i]))
Beispiel #20
0
 def test_pv_slice_int(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_slice_int")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_slice_int")
     # Caso: slice, int
     s1 = slice(1,65)
     s2 = 30
     i=1
     n2 = sn[s1,s2]
     self.assertTrue(np.array_equal(n2[i], n[s1,s2][i]))
Beispiel #21
0
 def test_pv_slice_slice(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_slice_slice")
     sn.sync() # Flush values to cassandra
     del sn
     sn = StorageNumpy(None,"test_pv_slice_slice")
     # Caso: slice, slice
     s1 = slice(1,65)
     n1 = sn[s1,s1]
     i=1
     j=1
     self.assertTrue(np.array_equal(n1[i,j], n[s1,s1][i,j]))
Beispiel #22
0
 def test_pv_slice_single_column(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_slice_single_column")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_slice_single_column")
     s1 = (slice(None, None, None), 30)
     s2 = slice(1, None, None)
     self.assertTrue(sn[s1].shape == n[s1].shape)
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
     self.assertTrue(np.array_equal(sn[s1][s2], n[s1][s2]))
     self.assertTrue(np.array_equal(sn[s1][s2][s2], n[s1][s2][s2]))
Beispiel #23
0
 def test_pv_three_dimensions_easy(self):
     n = np.arange(4*4*4).reshape(4,4,4)
     sn = StorageNumpy(n,"test_pv_three_dimensions_easy")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_three_dimensions_easy")
     orig3 = (slice(None, None, None), slice(None, None, None), slice(None, None, None))
     s1 = (0, 1, slice(None, None, None))
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
     s2 = slice(1, None, None)
     self.assertTrue(np.array_equal(sn[s1][s2], n[s1][s2]))
     self.assertTrue(np.array_equal(sn[s1][s2][s2], n[s1][s2][s2]))
Beispiel #24
0
 def test_arrow_access(self):
     n = np.arange(50*50).reshape(50,50)
     s = StorageNumpy(n, "test_arrow_access")
     s.sync()
     del s
     s = StorageNumpy(None, "test_arrow_access")
     x = s[:, 20]
     self.assertTrue(np.array_equal(x, n[:,20]))
     y = s[:, 30]
     self.assertTrue(np.array_equal(y, n[:,30]))
     z = s[:, 49]
     self.assertTrue(np.array_equal(z, n[:,49]))
Beispiel #25
0
    def test_slicing_3d(self):
        base = np.arange(8 * 8 * 4).reshape((8, 8, 4))
        hecu = StorageNumpy(input_array=base, name='test_slicing_3d')
        res_hecu = hecu[6:7, 4:]
        res = base[6:7, 4:]
        self.assertTrue(np.array_equal(res, res_hecu))

        hecu.sync() # Flush values to cassandra
        hecu = StorageNumpy(name="test_slicing_3d")
        res_hecu = hecu[6:7, 4:]
        self.assertTrue(np.array_equal(res, res_hecu))

        hecu.delete_persistent()
Beispiel #26
0
 def test_pv_negative_indexes(self):
     n = np.arange(66*66).reshape(66,66)
     sn = StorageNumpy(n,"test_pv_negative_indexes")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_negative_indexes")
     s1 = -1
     self.assertTrue(np.array_equal(sn[s1], n[s1]))
     nn = np.arange(66*66)
     snn = StorageNumpy(nn,"test_pv_negative_indexes_small")
     del snn
     snn = StorageNumpy(None,"test_pv_negative_indexes_small")
     self.assertTrue(np.array_equal(snn[s1], nn[s1]))
Beispiel #27
0
 def test_pv_three_dimensions_all_coords(self):
     n = np.arange(8*8*8).reshape(8,8,8)
     sn = StorageNumpy(n,"test_pv_three_dimensions_all_coords")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_three_dimensions_all_coords")
     orig3 = (slice(None, None, None), slice(None, None, None), slice(None, None, None))
     coords = []
     for i in sn.calculate_block_coords(orig3):
         coords.append(i)
     expected = [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1)]
     result = all(map(lambda x, y: x == y, expected, coords))
     self.assertTrue(result, True)
Beispiel #28
0
 def test_pv_three_dimensions_slice_twodim(self):
     n = np.arange(8*8*8).reshape(8,8,8)
     sn = StorageNumpy(n,"test_pv_three_dimensions_slice_twodim")
     sn.sync()
     del sn
     sn = StorageNumpy(None,"test_pv_three_dimensions_slice_twodim")
     ss = sn[(slice(None,None,None), slice(None,None,None),0)]
     coords = []
     for i in ss.calculate_block_coords(ss._build_args.view_serialization):
         coords.append(i)
     expected = [(0, 0, 0), (0, 1, 0), (1, 0, 0), (1, 1, 0)]
     result = all(map(lambda x, y: x == y, expected, coords))
     self.assertTrue(result, True)
Beispiel #29
0
    def test_read_all(self):
        nelem = 2 ** 21
        elem_dim = 2 ** 7

        base_array = np.arange(nelem).reshape((elem_dim, elem_dim, elem_dim))
        casted = StorageNumpy(input_array=base_array, name="test_read_all")

        casted.sync() # Flush values to cassandra
        test_numpy = np.arange(nelem).reshape((elem_dim, elem_dim, elem_dim))
        casted = StorageNumpy(name="test_read_all")
        chunk = casted[slice(None, None, None)]
        self.assertTrue(np.allclose(chunk.view(np.ndarray), test_numpy))
        casted.delete_persistent()
Beispiel #30
0
    def test_performance_storage_numpy_arrow(self):
        # Test the time to retrieve a column from Cassandra

        # Times to repeat the test
        TIMES = 10

        # Matrix sizes to test
        matrix_size = (100, 200, 300, 400, 500, 600, 700, 800, 900, 1000)
        n_cols = 3

        times = {}
        # Test 1 column
        for s in matrix_size:
            times[s] = []  # empty list for size 's'

            # Create a numpy
            n = np.arange(1000*s * n_cols).reshape(1000*s, n_cols)
            matrix_name = "matrix{}x{}".format(1000*s, n_cols)

            # Make it persistent
            o = StorageNumpy(n, matrix_name)

            o.sync() # Flush values to cassandra
            # Clean memory
            del o

            for i in range(TIMES):
                # Retrieve numpy from cassandra (NO data in memory)
                o = StorageNumpy(None, matrix_name)

                # LOAD_ON_DEMAND must be DISABLED!
                self.assertTrue(o.data.hex()[:40], '0' * 40)

                start = timer()

                # Load column
                column = random.randint(0, (n_cols-1))

                o[:, column]

                end = timer()

                # Store time
                times[s].append(end - start)
                del o

        # All tests done, print results
        print("\nRESULTS:")
        for s in matrix_size:
            print("Matrix size{}x{} = ".format(1000*s, n_cols), times[s])
        print("\n")