Example #1
0
    def test_dataframe_evictions(self):
        row1 = "row/path1/"
        row2 = "row/path2/"
        col1 = "col/path1/"
        all_rows = "row/"

        matrix1 = np.arange(8).reshape(2,4)
        matrix2 = np.arange(8).reshape(2,4)+2
        df = DataFrame()
        df[row1,col1] = DataFrame.from_matrix(matrix1)
        df[row2,col1] = DataFrame.from_matrix(matrix2)

        # load into cache
        df[row1,col1].get_matrix()
        assert(df[row1,col1]._is_cached())

        df[row2,col1].get_matrix()
        assert(df[row1,col1]._is_cached())
        assert(df[row2,col1]._is_cached())

        # assert that the combined value is correct
        assert (df[all_rows,col1].get_matrix() ==
                np.vstack([matrix1,matrix2])).all()
        
        #assert cache membership
        assert(df[all_rows,col1]._is_cached())
        assert(not df[row1,col1]._is_cached())
        assert(not df[row2,col1]._is_cached())

        # attempt to modified a cached element
        M = df[all_rows,col1].get_matrix()
        M[0,:] = 0
        df[all_rows,col1] = DataFrame.from_matrix(M)

        # Assert the matrix is still in the cache
        assert df[all_rows,col1]._is_cached()
        # Assert that the new value is saved to cache
        assert (df[all_rows,col1].get_matrix()==M).all()

        # Pull an entry and evict all_rows
        M1 = df[row1,col1].get_matrix()
        M2 = df[row2,col1].get_matrix()

        assert df[row1,col1]._is_cached()
        assert df[row2,col1]._is_cached()
        assert not df[all_rows,col1]._is_cached()
        # assert that the pulled values match what they should
        assert (np.vstack([M1,M2])==M).all()

        assert (df[all_rows,col1].get_matrix()==M).all()
        df[all_rows,col1][0:1,:].get_matrix()
        assert (df[all_rows,col1][0:1,:]._is_cached())

        # evict an entry that will change
        row3 = "row/path3/"
        matrix3 = np.arange(8).reshape(2,4)+4
        df[row3,col1].set_matrix(matrix3)
        assert (df[row3,col1].get_matrix()==matrix3).all()
Example #2
0
 def test_dataframe_del(self):
     rows = "row/"
     cols = "col/"
     row1 = "row/path1/"
     row2 = "row/path2/"
     col1 = "col/path1/"
     col2 = "col/path2/"
     matrix1 = np.arange(6).reshape(2,3)
     matrix2 = np.arange(6).reshape(2,3)+10
     matrix3 = np.arange(6).reshape(2,3)+20
     matrix4 = np.arange(6).reshape(2,3)+30
     matrix_row1 = np.hstack([matrix1,matrix2])
     matrix_row2 = np.hstack([matrix3,matrix4])
     matrix_all = np.vstack([matrix_row1,matrix_row2])
     df = DataFrame()
     M_df = DataFrame.from_matrix(matrix1)
     df[row1,col1].set_matrix(matrix1)
     df[row1,col2].set_matrix(matrix2)
     df[row2,col1].set_matrix(matrix3)
     df[row2,col2].set_matrix(matrix4)
     assert (df[rows,cols].get_matrix()==matrix_all).all()
     del df[row1,col1]
     try: 
         df[row1,col1].get_matrix()
         raise
     except KeyError: 
         pass
     print df._row_index
     assert (df[rows,cols].get_matrix()==matrix4).all()
Example #3
0
    def test_from_matrix(self):
        matrix1 = np.ones((2,3))
        df1 = DataFrame.from_matrix(matrix1, row_labels = ["a","b"])
        df2 = DataFrame.from_matrix(matrix1, col_labels = ["a","b","c"])
        df3 = DataFrame.from_matrix(matrix1,
                                    col_labels = ["a","b", "c"],
                                    row_labels = ["a","b"])

        assert(df1.shape==matrix1.shape)
        assert(df2.shape==matrix1.shape)
        assert(df3.shape==matrix1.shape)
        assert(df1.get_matrix().shape==matrix1.shape)
        assert(df2.get_matrix().shape==matrix1.shape)
        assert(df3.get_matrix().shape==matrix1.shape)
        assert(df1.get_matrix()==matrix1).all()
        assert(df2.get_matrix()==matrix1).all()
        assert(df3.get_matrix()==matrix1).all()
Example #4
0
 def test_matrix_set(self):
     row1 = "row/path1/"
     row2 = "row/path2/"
     col1 = "col/path1/"
     col2 = "col/path2/"
     matrix1 = np.arange(6).reshape(2,3)
     df = DataFrame()
     M_df = DataFrame.from_matrix(matrix1)
     df[row1,col1].set_matrix(matrix1)
     assert (df[row1,col1].get_matrix()==matrix1).all()
Example #5
0
    def test_dataframe_update_propogation(self):
        row1 = "row/path1/"
        row11 = "row/path1/sub1/"
        row12 = "row/path1/sub2/"
        row2 = "row/path2/"
        cols = "col/"
        df = DataFrame()

        matrix1 = np.arange(8).reshape(2,4)

        df[row11,cols] = DataFrame.from_matrix(matrix1)
        df[row2,cols] = dreaml.transformations.Identity(df[row1,cols])
        # Check a few equalities from directory indexing 
        assert (df[row11,cols].get_matrix()==df[row1,cols].get_matrix()).all()
        assert (df[row1,cols].get_matrix()==matrix1).all()
        assert (df[row2,cols].get_matrix()==matrix1).all()

        # validate some simple graph properties
        h1 = df[row1,cols].hash()
        h2 = df[row2,cols].hash()
        assert df._graph.node[h1]["status"] == df.STATUS_GREEN
        assert df._graph.node[h2]["status"] == df.STATUS_GREEN

        matrix2 = np.arange(8).reshape(2,4) + 5
        df[row12,cols] = DataFrame.from_matrix(matrix2)
        # Check that cache entries have been properly invalidated
        assert not df[row1,cols]._is_cached()

        # Check that set matrices are correct
        assert (df[row11,cols].get_matrix() == matrix1).all()
        assert df[row11,cols]._is_cached()
        assert (df[row12,cols].get_matrix() == matrix2).all()
        assert df[row12,cols]._is_cached()

        # check that total matrix is correct
        matrix3 = np.vstack([matrix1,matrix2])
        assert (df[row1,cols].get_matrix()==matrix3).all()

        # assert that the two matrices are equal
        assert (df[row1,cols].shape==df[row2,cols].shape)
        assert (df[row1,cols].get_matrix()==df[row2,cols].get_matrix()).all()
Example #6
0
    def test_dataframe_hierarchy(self):
        # Add a new row entry within row directory
        rows = "rows/"
        row1 = "rows/path1/"
        row2 = "rows/path2/"
        cols = "cols/"
        col1 = "cols/p1/"
        col2 = "cols/p2/"

        matrix1 = np.arange(6).reshape(2,3)
        matrix2 = np.arange(6).reshape(2,3)+8

        df = DataFrame()
        df[row1,col1] = DataFrame.from_matrix(matrix1)
        assert (df[row1,col1].get_matrix() == matrix1).all()
        assert (df[rows,col1].get_matrix() == matrix1).all()
        assert df[rows,col1]._is_cached()

        df_both_rows = df[rows,col1]

        df[row2,col1] = DataFrame.from_matrix(matrix2)
        # Previous query should be evicted at this point
        assert df[row1,col1]._is_df_cached()
        assert df[row2,col1]._is_df_cached()

        assert not df_both_rows._is_df_cached()
        print df._cache
        assert not df_both_rows._is_cached()
        assert (df[row2,col1].get_matrix() == matrix2).all()

        # Check fetching everything
        matrix3 = np.vstack([matrix1,matrix2])
        print df[rows,col1].get_matrix()
        print matrix3
        print df[rows,col1]._row_index
        assert (df[rows,col1].shape == matrix3.shape)
        assert (df[rows,col1].get_matrix() == matrix3).all()
Example #7
0
    def test_cache_rows_then_evict_all(self):
        row = "row/"
        col = "col/"

        nrows = 4
        matrix1 = np.arange(8).reshape(nrows,2)
        df = DataFrame()
        df[row,col] = DataFrame.from_matrix(matrix1)

        for i in range(nrows):
            assert (df[i,:].get_matrix()==matrix1[i,:]).all()

        assert (df[:,:].get_matrix() == matrix1).all()

        for i in range(0,nrows,2):
            assert (df[i:i+2,:].get_matrix()==matrix1[i:i+2,:]).all()

        assert (df[:,:].get_matrix() == matrix1).all()
Example #8
0
    def test_nonexistant_query(self):
        row1 = "row/path1/"
        row2 = "row/path2/"
        row3 = "row/path3/"
        col1 = "col/path1/"
        col2 = "col/path2/"
        col3 = "col/path3/"
        matrix1 = np.arange(6).reshape(2,3)
        df = DataFrame()
        df[row1,col1] = DataFrame.from_matrix(matrix1)
        df[row2,col2]
        df[row3,col3] = dreaml.transformations.Identity(df[row1,col1])

        assert df[row2,col2].empty()
        assert df[row2,col3].empty()
        assert df[row3,col2].empty()

        assert not df[row1,col1].empty()
        assert not df[row3,col3].empty()
Example #9
0
    def test_dataframe_basic(self):
        print "kdfd"
        row1 = "row/path1/"
        row2 = "row/path2/"
        col1 = "col/path1/"
        col2 = "col/path2/"
        matrix1 = np.arange(6).reshape(2,3)
        df = DataFrame()

        # After inserting one block, other entries should not work
        df[row1,col1] = DataFrame.from_matrix(matrix1)
        assert (df[row1,col1].get_matrix() == matrix1).all()
        try:
            # missing column
            df[row1,col2].get_matrix()
            raise
        except KeyError:
            pass
        try:
            # missing row
            df[row2,col1].get_matrix()
            raise
        except KeyError:
            pass
        try:
            # missing both
            df[row1,col2].get_matrix()
            raise
        except KeyError:
            pass

        # Try overwriting the written matrix: 
        matrix2 = np.arange(6).reshape(2,3)+4
        df[row1,col1] = DataFrame.from_matrix(matrix2)
        assert (df[row1,col1].get_matrix() == matrix2).all()

        # try indexing with slices
        assert(df[row1,col1][0,:].get_matrix()==matrix2[0,:]).all()

        # try subtracting 1 from a row of dataframe partition
        matrix3 = matrix2
        matrix3[0,:] -= 1

        tmp = df[row1,col1][0,:].get_matrix()
        tmp -= 1
        df[row1,col1][0,:] = DataFrame.from_matrix(tmp)
        assert (df[row1,col1].get_matrix()==matrix3).all()

        # Test setting a single element
        df[1,1] = 5
        assert(df[1,1].get_matrix()[0,0] == 5)
        assert(df[1,1]._is_cached())

        # Test use of set_matrix on a single query
        matrix3 = np.arange(6).reshape(2,3)
        df[row1,col1].set_matrix(matrix3+5)
        assert not df[1,1]._is_cached()
        assert(df[row1,col1].get_matrix()==(matrix3+5)).all()
        assert df[row1,col1]._is_cached()

        # Test set_matrix on a nested query
        matrix4 = np.array([[42]])
        df[row1,col1][0,1].set_matrix(matrix4)
        assert (df[row1,col1][0,1].get_matrix()==matrix4).all()