def test_softmax_reg_loss(self): df = DataFrame() epsilon = 1e-4 y_path = ("y/", "y/") theta_path = ("theta/", "theta/") X_path = ("X/", "X/") k = 10 n, m = 5, 8 df[X_path] = DataFrame.from_matrix(nprand.rand(n, m)) df[theta_path] = DataFrame.from_matrix(nprand.rand(k, m)) y = np.zeros((n, k), dtype=bool) for i in range(n): j = nprand.randint(k) y[i, j] = True df[y_path] = DataFrame.from_matrix(y) reg = 0.0001 softmax = lambda theta_df: SoftmaxRegression(theta_df, df[X_path], df[ y_path], reg).f() g_central = self.central_diff(softmax, epsilon, df[theta_path]) g1 = SoftmaxRegression(df[theta_path], df[X_path], df[y_path], reg).g() # print g_central assert (np.allclose(g_central, g1))
def test_softmax_reg_loss(self): df = DataFrame() epsilon = 1e-4 y_path = ("y/","y/") theta_path = ("theta/","theta/") X_path = ("X/","X/") k = 10 n,m = 5,8 df[X_path] = DataFrame.from_matrix(nprand.rand(n,m)) df[theta_path] = DataFrame.from_matrix(nprand.rand(k,m)) y = np.zeros((n,k),dtype=bool) for i in range(n): j = nprand.randint(k) y[i,j] = True df[y_path] = DataFrame.from_matrix(y) reg = 0.0001 softmax = lambda theta_df: SoftmaxRegression(theta_df, df[X_path], df[y_path], reg).f() g_central = self.central_diff(softmax,epsilon,df[theta_path]) g1 = SoftmaxRegression(df[theta_path], df[X_path], df[y_path], reg).g() # print g_central assert(np.allclose(g_central,g1)) # Test batch by checking average gradient # g2 = np.zeros((k,m)) # for i in range(n): # g2 += Softmax.g(df[theta_path], df[X_path], df[y_path], reg) # g2 /= n # assert(np.allclose(g_central,g2))
def central_diff(self, f, epsilon, theta): print epsilon x = theta.get_matrix() n = x.shape[0] g = np.zeros(x.shape) if x.ndim == 2: for i in range(x.shape[0]): for j in range(x.shape[1]): upper = x.copy() upper[i, j] += epsilon lower = x.copy() lower[i, j] -= epsilon g[i, j] = ((f(DataFrame.from_matrix(upper)) - f(DataFrame.from_matrix(lower))) / (2 * epsilon)) elif x.ndim == 1: for i in range(x.shape[0]): upper = x.copy() upper[i] += epsilon lower = x.copy() lower[i] -= epsilon g[i] = ((f(DataFrame.from_matrix(upper)) - f(DataFrame.from_matrix(lower))) / (2 * epsilon)) else: raise ValueError return g
def central_diff(self,f,epsilon,theta): print epsilon x = theta.get_matrix() n = x.shape[0] g = np.zeros(x.shape) if x.ndim == 2: for i in range(x.shape[0]): for j in range(x.shape[1]): upper = x.copy() upper[i,j] += epsilon lower = x.copy() lower[i,j] -= epsilon g[i,j] = ((f(DataFrame.from_matrix(upper)) -f(DataFrame.from_matrix(lower))) /(2*epsilon)) elif x.ndim == 1: for i in range(x.shape[0]): upper = x.copy() upper[i] += epsilon lower = x.copy() lower[i] -= epsilon g[i] = ((f(DataFrame.from_matrix(upper)) -f(DataFrame.from_matrix(lower))) /(2*epsilon)) else: raise ValueError return g
def func(self, target_df, X_df): X = X_df.get_matrix() P = np.random.permutation(X.shape[0]) row_labels = X_df._row_index.keys() col_labels = X_df._col_index.keys() (row_query, col_query) = X_df.pwd() X_df._top_df[_auto_dir + row_query, _auto_dir + "permutation/"] = DataFrame.from_matrix(P[:, None]) # print "finished permute" return DataFrame.from_matrix(X[P, :], row_labels, col_labels)
def func(self, target_df, X_df): X = X_df.get_matrix() P = np.random.permutation(X.shape[0]) row_labels = X_df._row_index.keys() col_labels = X_df._col_index.keys() (row_query, col_query) = X_df.pwd() X_df._top_df[_auto_dir+row_query,_auto_dir+"permutation/"] = \ DataFrame.from_matrix(P[:,None]) # print "finished permute" return DataFrame.from_matrix(X[P, :], row_labels, col_labels)
def func(self,target_df,X_df, num_bases=50): X = X_df.get_matrix() X_m = np.mean(X,axis=0) # mean X_zm = X - X_m # X with 0 mean u,s,v_T = la.svd(X_zm) row_labels = [str(i) for i in range(X.shape[1])] col_labels = [str(i) for i in range(num_bases)] return DataFrame.from_matrix(np.real(v_T.T[:,:num_bases]),row_labels,col_labels)
def func(self, target_df, X_df, num_bases=50): X = X_df.get_matrix() X_m = np.mean(X, axis=0) # mean X_zm = X - X_m # X with 0 mean u, s, v_T = la.svd(X_zm) row_labels = [str(i) for i in range(X.shape[1])] col_labels = [str(i) for i in range(num_bases)] return DataFrame.from_matrix(np.real(v_T.T[:, :num_bases]), row_labels, col_labels)
def test_permutation(self): df = DataFrame() M1_path = ("row1/", "col1/") permute_path1 = ("row2/", "col1/") M1 = nprand.rand(3, 5) df[M1_path] = DataFrame.from_matrix(M1) df[permute_path1] = Permute(df[M1_path]) p_df = df["auto/row1/", "auto/permutation/"] p = p_df.get_matrix().ravel() assert (df[permute_path1].get_matrix() == M1[p, :]).all()
def test_permutation(self): df = DataFrame() M1_path = ("row1/","col1/") permute_path1 = ("row2/","col1/") M1 = nprand.rand(3,5) df[M1_path] = DataFrame.from_matrix(M1) df[permute_path1] = Permute(df[M1_path]) p_df = df["auto/row1/","auto/permutation/"] p = p_df.get_matrix().ravel() assert (df[permute_path1].get_matrix()==M1[p,:]).all()
def test_dot(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") dot_path1 = ("row1/", "col2/") M1 = nprand.rand(3, 5) M2 = nprand.rand(5, 8) df[M1_path] = DataFrame.from_matrix(M1) df[M2_path].set_matrix(M2) df[dot_path1] = Dot(df[M1_path], df[M2_path]) assert (df[dot_path1].get_matrix() == M1.dot(M2)).all()
def test_dot(self): df = DataFrame() M1_path = ("row1/","col1/") M2_path = ("row2/","col2/") dot_path1 = ("row1/","col2/") M1 = nprand.rand(3,5) M2 = nprand.rand(5,8) df[M1_path] = DataFrame.from_matrix(M1) df[M2_path].set_matrix(M2) df[dot_path1] = Dot(df[M1_path],df[M2_path]) assert (df[dot_path1].get_matrix()==M1.dot(M2)).all()
def test_sgd(self): # Also test sgd close = np.array([[-44.25076083, 38.62854577], [-38.41473092, 36.29945225], [-31.43300105, 30.79620632], [-21.27706071, 24.08638079], [-14.00259076, 6.54438641], [ 11.52354442, -6.07783327], [ 48.69374796, -38.64696136], [ 95.49682071, -84.38906967]]) df = DataFrame() path = "row/","col/" df["xrow/","xcol/"]= DataFrame.from_matrix(np.arange(16).reshape(8,2)) df["yrow/","ycol/"] = DataFrame.from_matrix(np.arange(8).reshape(8,1)) X_df = df["xrow/","xcol/"] y_df = df["yrow/","ycol/"] df[path] = SGD(SquareTest,close,y_df,batch_size=8,step_size=0.5) sleep(1) df[path].stop() assert np.allclose(df[path].get_matrix(), y_df.get_matrix())
def test_linear(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") linear_path1 = ("row1/", "col2/") M1 = nprand.rand(3, 5) M2 = nprand.rand(3, 5) df[M1_path] = DataFrame.from_matrix(M1) df[M2_path].set_matrix(M2) a = 2 b = -3 df[linear_path1] = Linear(a, df[M1_path], b, df[M2_path]) assert (df[linear_path1].get_matrix() == a * M1 + b * M2).all()
def test_linear(self): df = DataFrame() M1_path = ("row1/","col1/") M2_path = ("row2/","col2/") linear_path1 = ("row1/","col2/") M1 = nprand.rand(3,5) M2 = nprand.rand(3,5) df[M1_path] = DataFrame.from_matrix(M1) df[M2_path].set_matrix(M2) a = 2 b = -3 df[linear_path1] = Linear(a,df[M1_path],b,df[M2_path]) assert (df[linear_path1].get_matrix()==a*M1+b*M2).all()
def test_sgd(self): # Also test sgd close = np.array([[-44.25076083, 38.62854577], [-38.41473092, 36.29945225], [-31.43300105, 30.79620632], [-21.27706071, 24.08638079], [-14.00259076, 6.54438641], [11.52354442, -6.07783327], [48.69374796, -38.64696136], [95.49682071, -84.38906967]]) df = DataFrame() path = "row/", "col/" df["xrow/", "xcol/"] = DataFrame.from_matrix(np.arange(16).reshape(8, 2)) df["yrow/", "ycol/"] = DataFrame.from_matrix(np.arange(8).reshape(8, 1)) X_df = df["xrow/", "xcol/"] y_df = df["yrow/", "ycol/"] df[path] = SGD(SquareTest, close, y_df, batch_size=8, step_size=0.5) sleep(1) df[path].stop() assert np.allclose(df[path].get_matrix(), y_df.get_matrix())
def func(self,target_df,a,X_df,b,Y_df,row_labels=None,col_labels=None): """Fetch matrices from dataframes, and return the resulting linear combination in a dataframe""" x = X_df.get_matrix() y = Y_df.get_matrix() if row_labels==None: row_labels = X_df._row_index.keys() if col_labels==None: col_labels = X_df._col_index.keys() if (x.shape != y.shape): raise ValueError return DataFrame.from_matrix(a*x+b*y,row_labels,col_labels)
def func(self, target_df, a, X_df, b, Y_df, row_labels=None, col_labels=None): """Fetch matrices from dataframes, and return the resulting linear combination in a dataframe""" x = X_df.get_matrix() y = Y_df.get_matrix() if row_labels == None: row_labels = X_df._row_index.keys() if col_labels == None: col_labels = X_df._col_index.keys() if (x.shape != y.shape): raise ValueError return DataFrame.from_matrix(a * x + b * y, row_labels, col_labels)
def func(self, target_df, X_df): X = X_df.get_matrix() X_m = np.mean(X, axis=0) X_zm = X - X_m # X with zero mean return DataFrame.from_matrix(X_zm)
def test_simple_query(self): df = DataFrame.from_matrix(np.arange(6).reshape(2, 3)) assert df._is_simple_query() assert df["row/", "col/"]._is_simple_query() assert df["row/", "col/"][:, :]._is_simple_query() assert df["row/", "col/"][0:1, 2:3]._is_simple_query()
def func(self,target_df,X_df,Y_df): x = X_df.get_matrix() y = Y_df.get_matrix() row_labels = X_df._row_index.keys() col_labels = Y_df._col_index.keys() return DataFrame.from_matrix(x.dot(y),row_labels,col_labels)
def func(self,target_df,X_df): X = X_df.get_matrix() X_m = np.mean(X, axis=0) X_zm = X - X_m # X with zero mean return DataFrame.from_matrix(X_zm)
def func(self, target_df, X_df, Y_df): x = X_df.get_matrix() y = Y_df.get_matrix() row_labels = X_df._row_index.keys() col_labels = Y_df._col_index.keys() return DataFrame.from_matrix(x.dot(y), row_labels, col_labels)
def func(self,target_df, X_df): return DataFrame.from_matrix(X_df.get_matrix())
def func(self, target_df, X_df): return DataFrame.from_matrix(X_df.get_matrix())
def test_simple_query(self): df = DataFrame.from_matrix(np.arange(6).reshape(2,3)) assert df._is_simple_query() assert df["row/","col/"]._is_simple_query() assert df["row/","col/"][:,:]._is_simple_query() assert df["row/","col/"][0:1,2:3]._is_simple_query()