#generator = SyntheticDataset1(startM=5000, endM=10000, startN=1000, endN=1500, pnz=0.10, noise=0.01) #generator = FlixsterDataset() generator = MovieLensDataset() iterator = CenterMatrixIterator(generator.getTrainIteratorFunc()) k = 50 for i in range(1): X = iterator.next() if i==0: lastX = scipy.sparse.csc_matrix(X.shape) print("About to compute SVD") U, s, V = SparseUtils.svdPropack(X, k) print("Computed SVD") plt.figure(0) plt.plot(numpy.arange(s.shape[0]), s) """ deltaX = X - lastX deltaX.eliminate_zeros() deltaX.prune() print(X.nnz-lastX.nnz) U, s, V = SparseUtils.svdPropack(deltaX, k) plt.figure(1) plt.plot(numpy.arange(s.shape[0]), s) lastX = X """
lastX = X else: E = X - lastX E.eliminate_zeros() print(X.nnz, E.nnz) startTime = time.time() U3, s3, V3 = RandomisedSVD.updateSvd(X, U3, s3, V3, E, k, p) times[i, 1] = time.time() - startTime lastX = X errors[i, 1] = numpy.linalg.norm(X - (U3*s3).dot(V3.T)) #Accurate method startTime = time.time() U4, s4, V4 = SparseUtils.svdPropack(X, k) times[i, 2] = time.time() - startTime errors[i, 2] = numpy.linalg.norm(X - (U4*s4).dot(V4.T)) #Final method - just use the same SVD if i == 0: startTime = time.time() U5, s5, V5 = SparseUtils.svdPropack(X, k) times[i, 3] = time.time() - startTime errors[i, 3] = numpy.linalg.norm(X - (U5*s5).dot(V5.T)) cumtimes = numpy.cumsum(times, 0) print(cumtimes)