join_set2 = np.genfromtxt('./data/BookCrossing/MLFK2.csv', skip_header=True, dtype=int) r2 = mmread('./data/BookCrossing/MLR2Sparse.txt', ) Y = np.matrix(np.genfromtxt('./data/BookCrossing/MLY.csv', skip_header=True)).T k = [join_set1 - 1, join_set2 - 1] T = hstack((r1.tocsr()[k[0]], r2.tocsr()[k[1]])) w_init = np.matrix(np.random.randn(T.shape[1], 1)) gamma = 0.000001 iterations = 20 result_eps = 1e-6 print "start factorized matrix" normalized_matrix = nm.NormalizedMatrix(s, [r1, r2], k) print "end factorized matrix" import time m_regressor = NormalizedLinearRegression() print "start materialized regression" start = time.time() m_regressor.fit(T, Y, w_init=w_init) end = time.time() print "end materialized regression" m_time = end - start w_init = np.matrix(np.random.randn(T.shape[1], 1)) print "start factorized regression" n_regressor = NormalizedLinearRegression()
# Scalar print "start tesing scalar" total = [] for f in range(1, 5): result = [] for t in range(1, 21): print "scalar, feature ratio:", f, "tuple ratio", t dr = ds * f ns = nr * t s = np.random.rand(ns, ds) r = [np.random.rand(nr, dr)] num = np.random.randint(nr, size=ns) k = [num] T = np.hstack((s, r[0][k[0]])) normalized_matrix = nm.NormalizedMatrix(s, r, k) avg = [] for _ in range(trails): m_start = time.time() # np.add(T, 5) np.power(T, 2) m_end = time.time() n_start = time.time() # normalized_matrix + 5 np.power(normalized_matrix, 2) n_end = time.time() avg.append((m_end - m_start) / (n_end - n_start))