def test_setitem(self): df = DataFrame() rows = "row/" cols = "col/" M = np.arange(6).reshape(2, 3) df.__setitem__((slice(None, None, None), slice(None, None, None)), M) assert (df.get_matrix() == M).all() df = DataFrame() df.__setitem__((slice(None, None, None), slice(None, None, None)), M, rows=["a", "b"], cols=["c", "d", "e"]) assert (df.get_matrix() == M).all() df = DataFrame() df["x/", "y/"].__setitem__( (slice(None, None, None), slice(None, None, None)), M, rows=["a", "b"], cols=["c", "d", "e"]) assert (df.get_matrix() == M).all() assert (df["x/", "y/"].get_matrix() == M).all() df["x/", "y/"].__setitem__( (slice(None, None, None), slice(None, None, None)), 2) assert (df.get_matrix() == 2).all()
def test_gd(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") batch1_path = ("row1/", "col1/batch1/") batch2_path = ("row1/", "col1/batch2/") x0_path = ("x0/", "y0/") M1 = nprand.rand(3, 5) df[batch1_path].set_matrix(M1) M2 = np.zeros((3, 5)) df[M2_path] = GD(SquareTest, M2, df[M1_path], step_size=1) sleep(1) # df[M2_path].stop() assert np.allclose(df[M2_path].get_matrix(), df[M1_path].get_matrix()) # Assert that the input structure has been replicated assert ( df["row2/", "col2/batch1/"].get_matrix() == df[M2_path].get_matrix()).all() # Now attempt to extend the parameter matrix M3 = nprand.rand(3, 4) df[batch2_path].set_matrix(M3) sleep(1) assert df[M2_path].shape == df[M1_path].shape assert df[M2_path].shape == (3, 9) assert np.allclose(df[M2_path].get_matrix(), df[M1_path].get_matrix()) df[M2_path].stop()
def test_softmax_reg_loss(self): df = DataFrame() epsilon = 1e-4 y_path = ("y/", "y/") theta_path = ("theta/", "theta/") X_path = ("X/", "X/") k = 10 n, m = 5, 8 df[X_path] = DataFrame.from_matrix(nprand.rand(n, m)) df[theta_path] = DataFrame.from_matrix(nprand.rand(k, m)) y = np.zeros((n, k), dtype=bool) for i in range(n): j = nprand.randint(k) y[i, j] = True df[y_path] = DataFrame.from_matrix(y) reg = 0.0001 softmax = lambda theta_df: SoftmaxRegression(theta_df, df[X_path], df[ y_path], reg).f() g_central = self.central_diff(softmax, epsilon, df[theta_path]) g1 = SoftmaxRegression(df[theta_path], df[X_path], df[y_path], reg).g() # print g_central assert (np.allclose(g_central, g1))
def test_zero_mean(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") M1 = nprand.rand(3, 5) M1_zm = M1 - np.mean(M1, axis=0) df[M1_path].set_matrix(M1) df[M2_path] = ZeroMean(df[M1_path]) assert (df[M2_path].get_matrix() == M1_zm).all()
def test_permutation(self): df = DataFrame() M1_path = ("row1/", "col1/") permute_path1 = ("row2/", "col1/") M1 = nprand.rand(3, 5) df[M1_path] = DataFrame.from_matrix(M1) df[permute_path1] = Permute(df[M1_path]) p_df = df["auto/row1/", "auto/permutation/"] p = p_df.get_matrix().ravel() assert (df[permute_path1].get_matrix() == M1[p, :]).all()
def test_dot(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") dot_path1 = ("row1/", "col2/") M1 = nprand.rand(3, 5) M2 = nprand.rand(5, 8) df[M1_path] = DataFrame.from_matrix(M1) df[M2_path].set_matrix(M2) df[dot_path1] = Dot(df[M1_path], df[M2_path]) assert (df[dot_path1].get_matrix() == M1.dot(M2)).all()
def test_linear(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") linear_path1 = ("row1/", "col2/") M1 = nprand.rand(3, 5) M2 = nprand.rand(3, 5) df[M1_path] = DataFrame.from_matrix(M1) df[M2_path].set_matrix(M2) a = 2 b = -3 df[linear_path1] = Linear(a, df[M1_path], b, df[M2_path]) assert (df[linear_path1].get_matrix() == a * M1 + b * M2).all()
def test_tuple_to_query(self): df = DataFrame() # Test conversion of hashable elements to their actual queries string = "randomstring" slice_hash, slice_actual = (slice, (2, 4, 1)), slice(2, 4, 1) list_hash, list_actual = (list, (1, 2, 3, 4, 5, 6)), [1, 2, 3, 4, 5, 6] assert df._tuple_element_to_query(string) == string assert df._tuple_element_to_query(slice_hash) == slice_actual assert df._tuple_element_to_query(list_hash) == list_actual assert df._query_to_tuple_element(string) == string assert df._query_to_tuple_element(slice_actual) == slice_hash assert df._query_to_tuple_element(list_actual) == list_hash
def test_PCA_basis(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") n = 10 m = 5 d = 3 M1 = nprand.rand(n, m) M1 = M1 - np.mean(M1, axis=0) # print M1 df[M1_path].set_matrix(M1) df[M2_path] = PCABasis(df[M1_path], d) u, s, v_T = numpy.linalg.svd(M1, full_matrices=False) s[d + 1:] = 0 v = v_T.T[:, :d] M1_reconstructed = u.dot(np.diag(s).dot(v_T)) # print M1 # print M1_reconstructed M1_reconstructed2 = M1.dot(v).dot(v.T) # print M1_reconstructed2 # print M1.dot(v.dot(v.T)) covmat = (1. / (n - 1)) * M1.T.dot(M1) evs, evmat = scipy.linalg.eig(covmat) p = np.argsort(evs)[::-1] evmat_sorted = evmat[:, p][:, :d] M1_reconstructed3 = M1.dot(evmat_sorted).dot(evmat_sorted.T) basis = df[M2_path].get_matrix() for i in range(evmat_sorted.shape[1]): assert np.isclose(basis[:,i], evmat_sorted[:,i]).all() or \ np.isclose(basis[:,i],-evmat_sorted[:,i]).all() M3_path = ("row3/", "col3/") M3 = nprand.rand(2 * n, m) M3 = M3 - np.mean(M1, axis=0) df[M3_path].set_matrix(M3) pca_path = ("pca/", "pca/") df[pca_path] = PCA(df[M1_path], df[M3_path], d) pca = df[pca_path].get_matrix() proj = M3.dot(evmat_sorted) for i in range(pca.shape[1]): assert np.isclose(pca[:,i], proj[:,i]).all() or \ np.isclose(pca[:,i],-proj[:,i]).all()
def test_one_hot_encoding(self): df = DataFrame() M1_path = ("row1/", "col1/") M2_path = ("row2/", "col2/") n = 10 m = 5 M1 = np.vstack( [nprand.randint(0, m, (n, 1)), np.arange(m).reshape(m, 1)]) M2 = np.zeros((n + m, m)) for i in range(n + m): M2[i, M1[i]] = 1 df[M1_path].set_matrix(M1) df[M2_path] = OneHotEncoding(df[M1_path]) assert (df[M2_path].get_matrix() == M2).all()
def test_sgd(self): # Also test sgd close = np.array([[-44.25076083, 38.62854577], [-38.41473092, 36.29945225], [-31.43300105, 30.79620632], [-21.27706071, 24.08638079], [-14.00259076, 6.54438641], [11.52354442, -6.07783327], [48.69374796, -38.64696136], [95.49682071, -84.38906967]]) df = DataFrame() path = "row/", "col/" df["xrow/", "xcol/"] = DataFrame.from_matrix(np.arange(16).reshape(8, 2)) df["yrow/", "ycol/"] = DataFrame.from_matrix(np.arange(8).reshape(8, 1)) X_df = df["xrow/", "xcol/"] y_df = df["yrow/", "ycol/"] df[path] = SGD(SquareTest, close, y_df, batch_size=8, step_size=0.5) sleep(1) df[path].stop() assert np.allclose(df[path].get_matrix(), y_df.get_matrix())