Example #1
0
    def central_diff(self, f, epsilon, theta):
        print epsilon
        x = theta.get_matrix()
        n = x.shape[0]
        g = np.zeros(x.shape)
        if x.ndim == 2:
            for i in range(x.shape[0]):
                for j in range(x.shape[1]):
                    upper = x.copy()
                    upper[i, j] += epsilon
                    lower = x.copy()
                    lower[i, j] -= epsilon
                    g[i,
                      j] = ((f(DataFrame.from_matrix(upper)) -
                             f(DataFrame.from_matrix(lower))) / (2 * epsilon))
        elif x.ndim == 1:
            for i in range(x.shape[0]):
                upper = x.copy()
                upper[i] += epsilon
                lower = x.copy()
                lower[i] -= epsilon

                g[i] = ((f(DataFrame.from_matrix(upper)) -
                         f(DataFrame.from_matrix(lower))) / (2 * epsilon))
        else:
            raise ValueError
        return g
Example #2
0
    def test_softmax_reg_loss(self):
        df = DataFrame()
        epsilon = 1e-4
        y_path = ("y/", "y/")
        theta_path = ("theta/", "theta/")
        X_path = ("X/", "X/")

        k = 10
        n, m = 5, 8
        df[X_path] = DataFrame.from_matrix(nprand.rand(n, m))
        df[theta_path] = DataFrame.from_matrix(nprand.rand(k, m))
        y = np.zeros((n, k), dtype=bool)
        for i in range(n):
            j = nprand.randint(k)
            y[i, j] = True
        df[y_path] = DataFrame.from_matrix(y)
        reg = 0.0001

        softmax = lambda theta_df: SoftmaxRegression(theta_df, df[X_path], df[
            y_path], reg).f()

        g_central = self.central_diff(softmax, epsilon, df[theta_path])
        g1 = SoftmaxRegression(df[theta_path], df[X_path], df[y_path], reg).g()

        # print g_central
        assert (np.allclose(g_central, g1))
Example #3
0
    def central_diff(self,f,epsilon,theta):
        print epsilon
        x = theta.get_matrix()
        n = x.shape[0]
        g = np.zeros(x.shape)
        if x.ndim == 2:
            for i in range(x.shape[0]):
                for j in range(x.shape[1]):
                    upper = x.copy()
                    upper[i,j] += epsilon
                    lower = x.copy()
                    lower[i,j] -= epsilon
                    g[i,j] = ((f(DataFrame.from_matrix(upper))
                             -f(DataFrame.from_matrix(lower)))
                             /(2*epsilon))
        elif x.ndim == 1:
            for i in range(x.shape[0]):
                upper = x.copy()
                upper[i] += epsilon
                lower = x.copy()
                lower[i] -= epsilon

                g[i] = ((f(DataFrame.from_matrix(upper))
                       -f(DataFrame.from_matrix(lower)))
                       /(2*epsilon))
        else:
            raise ValueError
        return g
Example #4
0
    def test_softmax_reg_loss(self):
        df = DataFrame()
        epsilon = 1e-4
        y_path = ("y/","y/")
        theta_path = ("theta/","theta/")
        X_path = ("X/","X/")

        k = 10
        n,m = 5,8
        df[X_path] = DataFrame.from_matrix(nprand.rand(n,m))
        df[theta_path] = DataFrame.from_matrix(nprand.rand(k,m))
        y = np.zeros((n,k),dtype=bool)
        for i in range(n):
            j = nprand.randint(k)
            y[i,j] = True
        df[y_path] = DataFrame.from_matrix(y)
        reg = 0.0001

        softmax = lambda theta_df: SoftmaxRegression(theta_df, df[X_path], 
                                                df[y_path], reg).f()


        g_central = self.central_diff(softmax,epsilon,df[theta_path])
        g1 = SoftmaxRegression(df[theta_path], df[X_path], df[y_path], reg).g()

        # print g_central
        assert(np.allclose(g_central,g1))

        # Test batch by checking average gradient
        # g2 = np.zeros((k,m))
        # for i in range(n):
        #     g2 += Softmax.g(df[theta_path], df[X_path], df[y_path], reg)
        # g2 /= n
        # assert(np.allclose(g_central,g2))
Example #5
0
 def test_permutation(self):
     df = DataFrame()
     M1_path = ("row1/", "col1/")
     permute_path1 = ("row2/", "col1/")
     M1 = nprand.rand(3, 5)
     df[M1_path] = DataFrame.from_matrix(M1)
     df[permute_path1] = Permute(df[M1_path])
     p_df = df["auto/row1/", "auto/permutation/"]
     p = p_df.get_matrix().ravel()
     assert (df[permute_path1].get_matrix() == M1[p, :]).all()
Example #6
0
 def test_dot(self):
     df = DataFrame()
     M1_path = ("row1/", "col1/")
     M2_path = ("row2/", "col2/")
     dot_path1 = ("row1/", "col2/")
     M1 = nprand.rand(3, 5)
     M2 = nprand.rand(5, 8)
     df[M1_path] = DataFrame.from_matrix(M1)
     df[M2_path].set_matrix(M2)
     df[dot_path1] = Dot(df[M1_path], df[M2_path])
     assert (df[dot_path1].get_matrix() == M1.dot(M2)).all()
Example #7
0
    def func(self, target_df, X_df):
        X = X_df.get_matrix()
        P = np.random.permutation(X.shape[0])
        row_labels = X_df._row_index.keys()
        col_labels = X_df._col_index.keys()

        (row_query, col_query) = X_df.pwd()

        X_df._top_df[_auto_dir + row_query, _auto_dir + "permutation/"] = DataFrame.from_matrix(P[:, None])
        # print "finished permute"
        return DataFrame.from_matrix(X[P, :], row_labels, col_labels)
Example #8
0
    def func(self, target_df, X_df):
        X = X_df.get_matrix()
        P = np.random.permutation(X.shape[0])
        row_labels = X_df._row_index.keys()
        col_labels = X_df._col_index.keys()

        (row_query, col_query) = X_df.pwd()

        X_df._top_df[_auto_dir+row_query,_auto_dir+"permutation/"] = \
            DataFrame.from_matrix(P[:,None])
        # print "finished permute"
        return DataFrame.from_matrix(X[P, :], row_labels, col_labels)
Example #9
0
 def test_linear(self):
     df = DataFrame()
     M1_path = ("row1/", "col1/")
     M2_path = ("row2/", "col2/")
     linear_path1 = ("row1/", "col2/")
     M1 = nprand.rand(3, 5)
     M2 = nprand.rand(3, 5)
     df[M1_path] = DataFrame.from_matrix(M1)
     df[M2_path].set_matrix(M2)
     a = 2
     b = -3
     df[linear_path1] = Linear(a, df[M1_path], b, df[M2_path])
     assert (df[linear_path1].get_matrix() == a * M1 + b * M2).all()
Example #10
0
    def test_gd(self):
        df = DataFrame()
        M1_path = ("row1/", "col1/")
        M2_path = ("row2/", "col2/")
        batch1_path = ("row1/", "col1/batch1/")
        batch2_path = ("row1/", "col1/batch2/")
        x0_path = ("x0/", "y0/")
        M1 = nprand.rand(3, 5)
        df[batch1_path].set_matrix(M1)

        M2 = np.zeros((3, 5))

        df[M2_path] = GD(SquareTest, M2, df[M1_path], step_size=1)
        sleep(1)
        # df[M2_path].stop()
        assert np.allclose(df[M2_path].get_matrix(), df[M1_path].get_matrix())

        # Assert that the input structure has been replicated
        assert (
            df["row2/",
               "col2/batch1/"].get_matrix() == df[M2_path].get_matrix()).all()

        # Now attempt to extend the parameter matrix
        M3 = nprand.rand(3, 4)
        df[batch2_path].set_matrix(M3)

        sleep(1)
        assert df[M2_path].shape == df[M1_path].shape
        assert df[M2_path].shape == (3, 9)
        assert np.allclose(df[M2_path].get_matrix(), df[M1_path].get_matrix())

        df[M2_path].stop()
Example #11
0
 def test_zero_mean(self):
     df = DataFrame()
     M1_path = ("row1/", "col1/")
     M2_path = ("row2/", "col2/")
     M1 = nprand.rand(3, 5)
     M1_zm = M1 - np.mean(M1, axis=0)
     df[M1_path].set_matrix(M1)
     df[M2_path] = ZeroMean(df[M1_path])
     assert (df[M2_path].get_matrix() == M1_zm).all()
Example #12
0
    def func(self,target_df,X_df, num_bases=50):
        X = X_df.get_matrix()
        X_m = np.mean(X,axis=0) # mean
        X_zm = X - X_m # X with 0 mean
        u,s,v_T = la.svd(X_zm)

        row_labels = [str(i) for i in range(X.shape[1])]
        col_labels = [str(i) for i in range(num_bases)]
        return DataFrame.from_matrix(np.real(v_T.T[:,:num_bases]),row_labels,col_labels)
Example #13
0
    def func(self, target_df, X_df, num_bases=50):
        X = X_df.get_matrix()
        X_m = np.mean(X, axis=0)  # mean
        X_zm = X - X_m  # X with 0 mean
        u, s, v_T = la.svd(X_zm)

        row_labels = [str(i) for i in range(X.shape[1])]
        col_labels = [str(i) for i in range(num_bases)]
        return DataFrame.from_matrix(np.real(v_T.T[:, :num_bases]), row_labels,
                                     col_labels)
Example #14
0
 def test_permutation(self):
     df = DataFrame()
     M1_path = ("row1/","col1/")
     permute_path1 = ("row2/","col1/")
     M1 = nprand.rand(3,5)
     df[M1_path] = DataFrame.from_matrix(M1)
     df[permute_path1] = Permute(df[M1_path])
     p_df = df["auto/row1/","auto/permutation/"]
     p = p_df.get_matrix().ravel()
     assert (df[permute_path1].get_matrix()==M1[p,:]).all()
Example #15
0
 def test_dot(self):
     df = DataFrame()
     M1_path = ("row1/","col1/")
     M2_path = ("row2/","col2/")
     dot_path1 = ("row1/","col2/")
     M1 = nprand.rand(3,5)
     M2 = nprand.rand(5,8)
     df[M1_path] = DataFrame.from_matrix(M1)
     df[M2_path].set_matrix(M2)
     df[dot_path1] = Dot(df[M1_path],df[M2_path])
     assert (df[dot_path1].get_matrix()==M1.dot(M2)).all()
Example #16
0
    def test_sgd(self):
        # Also test sgd
        close = np.array([[-44.25076083,  38.62854577],
                          [-38.41473092,  36.29945225],
                          [-31.43300105,  30.79620632],
                          [-21.27706071,  24.08638079],
                          [-14.00259076,   6.54438641],
                          [ 11.52354442,  -6.07783327],
                          [ 48.69374796, -38.64696136],
                          [ 95.49682071, -84.38906967]])

        df = DataFrame()
        path = "row/","col/"
        df["xrow/","xcol/"]= DataFrame.from_matrix(np.arange(16).reshape(8,2))
        df["yrow/","ycol/"] = DataFrame.from_matrix(np.arange(8).reshape(8,1))
        X_df = df["xrow/","xcol/"]
        y_df = df["yrow/","ycol/"]
        df[path] = SGD(SquareTest,close,y_df,batch_size=8,step_size=0.5)
        sleep(1)
        df[path].stop()
        assert np.allclose(df[path].get_matrix(), y_df.get_matrix())
    def test_tuple_to_query(self):
        df = DataFrame()
        # Test conversion of hashable elements to their actual queries
        string = "randomstring"
        slice_hash, slice_actual = (slice, (2, 4, 1)), slice(2, 4, 1)
        list_hash, list_actual = (list, (1, 2, 3, 4, 5, 6)), [1, 2, 3, 4, 5, 6]

        assert df._tuple_element_to_query(string) == string
        assert df._tuple_element_to_query(slice_hash) == slice_actual
        assert df._tuple_element_to_query(list_hash) == list_actual

        assert df._query_to_tuple_element(string) == string
        assert df._query_to_tuple_element(slice_actual) == slice_hash
        assert df._query_to_tuple_element(list_actual) == list_hash
Example #18
0
 def test_linear(self):
     df = DataFrame()
     M1_path = ("row1/","col1/")
     M2_path = ("row2/","col2/")
     linear_path1 = ("row1/","col2/")
     M1 = nprand.rand(3,5)
     M2 = nprand.rand(3,5)
     df[M1_path] = DataFrame.from_matrix(M1)
     df[M2_path].set_matrix(M2)
     a = 2
     b = -3
     df[linear_path1] = Linear(a,df[M1_path],b,df[M2_path])
     assert (df[linear_path1].get_matrix()==a*M1+b*M2).all()
Example #19
0
    def func(self,target_df,a,X_df,b,Y_df,row_labels=None,col_labels=None):
        """Fetch matrices from dataframes, and return the resulting linear 
        combination in a dataframe"""
        x = X_df.get_matrix()
        y = Y_df.get_matrix()

        if row_labels==None:
            row_labels = X_df._row_index.keys()
        if col_labels==None:
            col_labels = X_df._col_index.keys()

        if (x.shape != y.shape):
            raise ValueError
        return DataFrame.from_matrix(a*x+b*y,row_labels,col_labels)
Example #20
0
    def test_sgd(self):
        # Also test sgd
        close = np.array([[-44.25076083, 38.62854577],
                          [-38.41473092, 36.29945225],
                          [-31.43300105, 30.79620632],
                          [-21.27706071, 24.08638079],
                          [-14.00259076, 6.54438641],
                          [11.52354442, -6.07783327],
                          [48.69374796, -38.64696136],
                          [95.49682071, -84.38906967]])

        df = DataFrame()
        path = "row/", "col/"
        df["xrow/",
           "xcol/"] = DataFrame.from_matrix(np.arange(16).reshape(8, 2))
        df["yrow/",
           "ycol/"] = DataFrame.from_matrix(np.arange(8).reshape(8, 1))
        X_df = df["xrow/", "xcol/"]
        y_df = df["yrow/", "ycol/"]
        df[path] = SGD(SquareTest, close, y_df, batch_size=8, step_size=0.5)
        sleep(1)
        df[path].stop()
        assert np.allclose(df[path].get_matrix(), y_df.get_matrix())
Example #21
0
    def test_PCA_basis(self):
        df = DataFrame()
        M1_path = ("row1/", "col1/")
        M2_path = ("row2/", "col2/")
        n = 10
        m = 5
        d = 3
        M1 = nprand.rand(n, m)
        M1 = M1 - np.mean(M1, axis=0)
        # print M1
        df[M1_path].set_matrix(M1)

        df[M2_path] = PCABasis(df[M1_path], d)

        u, s, v_T = numpy.linalg.svd(M1, full_matrices=False)
        s[d + 1:] = 0

        v = v_T.T[:, :d]

        M1_reconstructed = u.dot(np.diag(s).dot(v_T))
        # print M1
        # print M1_reconstructed

        M1_reconstructed2 = M1.dot(v).dot(v.T)
        # print M1_reconstructed2
        # print M1.dot(v.dot(v.T))

        covmat = (1. / (n - 1)) * M1.T.dot(M1)
        evs, evmat = scipy.linalg.eig(covmat)
        p = np.argsort(evs)[::-1]
        evmat_sorted = evmat[:, p][:, :d]
        M1_reconstructed3 = M1.dot(evmat_sorted).dot(evmat_sorted.T)

        basis = df[M2_path].get_matrix()
        for i in range(evmat_sorted.shape[1]):
            assert np.isclose(basis[:,i], evmat_sorted[:,i]).all() or \
                   np.isclose(basis[:,i],-evmat_sorted[:,i]).all()

        M3_path = ("row3/", "col3/")
        M3 = nprand.rand(2 * n, m)
        M3 = M3 - np.mean(M1, axis=0)
        df[M3_path].set_matrix(M3)
        pca_path = ("pca/", "pca/")
        df[pca_path] = PCA(df[M1_path], df[M3_path], d)
        pca = df[pca_path].get_matrix()
        proj = M3.dot(evmat_sorted)

        for i in range(pca.shape[1]):
            assert np.isclose(pca[:,i], proj[:,i]).all() or \
                   np.isclose(pca[:,i],-proj[:,i]).all()
    def test_tuple_to_query(self):
        df = DataFrame()
        # Test conversion of hashable elements to their actual queries
        string = "randomstring"
        slice_hash, slice_actual = (slice,(2,4,1)), slice(2,4,1)
        list_hash, list_actual = (list,(1,2,3,4,5,6)), [1,2,3,4,5,6]

        assert df._tuple_element_to_query(string) == string
        assert df._tuple_element_to_query(slice_hash) == slice_actual
        assert df._tuple_element_to_query(list_hash) == list_actual

        assert df._query_to_tuple_element(string) == string
        assert df._query_to_tuple_element(slice_actual) == slice_hash
        assert df._query_to_tuple_element(list_actual) == list_hash
Example #23
0
    def test_one_hot_encoding(self):
        df = DataFrame()
        M1_path = ("row1/", "col1/")
        M2_path = ("row2/", "col2/")
        n = 10
        m = 5
        M1 = np.vstack(
            [nprand.randint(0, m, (n, 1)),
             np.arange(m).reshape(m, 1)])
        M2 = np.zeros((n + m, m))
        for i in range(n + m):
            M2[i, M1[i]] = 1

        df[M1_path].set_matrix(M1)
        df[M2_path] = OneHotEncoding(df[M1_path])

        assert (df[M2_path].get_matrix() == M2).all()
Example #24
0
    def func(self,
             target_df,
             a,
             X_df,
             b,
             Y_df,
             row_labels=None,
             col_labels=None):
        """Fetch matrices from dataframes, and return the resulting linear 
        combination in a dataframe"""
        x = X_df.get_matrix()
        y = Y_df.get_matrix()

        if row_labels == None:
            row_labels = X_df._row_index.keys()
        if col_labels == None:
            col_labels = X_df._col_index.keys()

        if (x.shape != y.shape):
            raise ValueError
        return DataFrame.from_matrix(a * x + b * y, row_labels, col_labels)
Example #25
0
 def func(self, target_df, X_df):
     return DataFrame.from_matrix(X_df.get_matrix())
 def test_simple_query(self):
     df = DataFrame.from_matrix(np.arange(6).reshape(2,3))
     assert df._is_simple_query()
     assert df["row/","col/"]._is_simple_query()
     assert df["row/","col/"][:,:]._is_simple_query()
     assert df["row/","col/"][0:1,2:3]._is_simple_query()
Example #27
0
 def func(self, target_df, X_df, Y_df):
     x = X_df.get_matrix()
     y = Y_df.get_matrix()
     row_labels = X_df._row_index.keys()
     col_labels = Y_df._col_index.keys()
     return DataFrame.from_matrix(x.dot(y), row_labels, col_labels)
Example #28
0
 def func(self, target_df, X_df):
     X = X_df.get_matrix()
     X_m = np.mean(X, axis=0)
     X_zm = X - X_m  # X with zero mean
     return DataFrame.from_matrix(X_zm)
 def test_simple_query(self):
     df = DataFrame.from_matrix(np.arange(6).reshape(2, 3))
     assert df._is_simple_query()
     assert df["row/", "col/"]._is_simple_query()
     assert df["row/", "col/"][:, :]._is_simple_query()
     assert df["row/", "col/"][0:1, 2:3]._is_simple_query()
    def test_setitem(self):
        df = DataFrame()
        rows = "row/"
        cols = "col/"
        M = np.arange(6).reshape(2, 3)
        df.__setitem__((slice(None, None, None), slice(None, None, None)), M)
        assert (df.get_matrix() == M).all()

        df = DataFrame()
        df.__setitem__((slice(None, None, None), slice(None, None, None)),
                       M,
                       rows=["a", "b"],
                       cols=["c", "d", "e"])
        assert (df.get_matrix() == M).all()

        df = DataFrame()
        df["x/", "y/"].__setitem__(
            (slice(None, None, None), slice(None, None, None)),
            M,
            rows=["a", "b"],
            cols=["c", "d", "e"])
        assert (df.get_matrix() == M).all()
        assert (df["x/", "y/"].get_matrix() == M).all()

        df["x/", "y/"].__setitem__(
            (slice(None, None, None), slice(None, None, None)), 2)
        assert (df.get_matrix() == 2).all()
Example #31
0
 def func(self,target_df,X_df,Y_df):
     x = X_df.get_matrix()
     y = Y_df.get_matrix()
     row_labels = X_df._row_index.keys()
     col_labels = Y_df._col_index.keys()
     return DataFrame.from_matrix(x.dot(y),row_labels,col_labels)
Example #32
0
 def func(self,target_df,X_df):
     X = X_df.get_matrix()
     X_m = np.mean(X, axis=0)
     X_zm = X - X_m # X with zero mean
     return DataFrame.from_matrix(X_zm)
    def test_setitem(self):
        df = DataFrame()
        rows = "row/"
        cols = "col/"
        M = np.arange(6).reshape(2,3)
        df.__setitem__((slice(None,None,None),slice(None,None,None)), M)
        assert (df.get_matrix()==M).all()

        df = DataFrame()
        df.__setitem__((slice(None,None,None),slice(None,None,None)), M, 
                       rows=["a","b"],cols=["c","d","e"])
        assert (df.get_matrix()==M).all()

        df = DataFrame()
        df["x/","y/"].__setitem__((slice(None,None,None),slice(None,None,None)), M, 
                       rows=["a","b"],cols=["c","d","e"])
        assert (df.get_matrix() == M).all()
        assert (df["x/","y/"].get_matrix() == M).all()

        df["x/","y/"].__setitem__((slice(None,None,None),slice(None,None,None)),2)
        assert (df.get_matrix() == 2).all()
Example #34
0
 def func(self,target_df, X_df):
     return DataFrame.from_matrix(X_df.get_matrix())