def distance2(self, graph1, graph2, permutation): """ Compute a graph distance metric between two graphs give a permutation vector. This is given by F(P) = (1-alpha)/(||W1||^2_F + ||W2||^2_F) (||W1 - P W2 P.T||^2_F) - alpha 1/(||V1||_F^2 + ||V2||_F^2) ||V1 - P.T V2||^2_F and is bounded between 0 and 1. :param graph1: A graph object :param graph2: The second graph object to match :param permutation: An array of permutation indices matching the first to second graph :type permutation: `numpy.ndarray` """ if self.useWeightM: W1 = graph1.getWeightMatrix() W2 = graph2.getWeightMatrix() else: W1 = graph1.adjacencyMatrix() W2 = graph2.adjacencyMatrix() if W1.shape[0] < W2.shape[0]: W1 = Util.extendArray(W1, W2.shape) elif W2.shape[0] < W1.shape[0]: W2 = Util.extendArray(W2, W1.shape) n = W1.shape[0] P = numpy.zeros((n, n)) P[(numpy.arange(n), permutation)] = 1 dist1 = numpy.linalg.norm(W1 - P.dot(W2).dot(P.T))**2 #Now compute the vertex similarities distance V1 = graph1.getVertexList().getVertices() V2 = graph2.getVertexList().getVertices() if V1.shape[0] < V2.shape[0]: V1 = Util.extendArray(V1, V2.shape) elif V2.shape[0] < V1.shape[0]: V2 = Util.extendArray(V2, V1.shape) dist2 = numpy.sum((V1 - P.T.dot(V2))**2) norm1 = ((W1**2).sum() + (W2**2).sum()) norm2 = ((V1**2).sum() + (V2**2).sum()) if norm1 != 0: dist1 = dist1 / norm1 if norm2 != 0: dist2 = dist2 / norm2 dist = (1 - self.alpha) * dist1 + self.alpha * dist2 return dist
def distance2(self, graph1, graph2, permutation): """ Compute a graph distance metric between two graphs give a permutation vector. This is given by F(P) = (1-alpha)/(||W1||^2_F + ||W2||^2_F) (||W1 - P W2 P.T||^2_F) - alpha 1/(||V1||_F^2 + ||V2||_F^2) ||V1 - P.T V2||^2_F and is bounded between 0 and 1. :param graph1: A graph object :param graph2: The second graph object to match :param permutation: An array of permutation indices matching the first to second graph :type permutation: `numpy.ndarray` """ if self.useWeightM: W1 = graph1.getWeightMatrix() W2 = graph2.getWeightMatrix() else: W1 = graph1.adjacencyMatrix() W2 = graph2.adjacencyMatrix() if W1.shape[0] < W2.shape[0]: W1 = Util.extendArray(W1, W2.shape) elif W2.shape[0] < W1.shape[0]: W2 = Util.extendArray(W2, W1.shape) n = W1.shape[0] P = numpy.zeros((n, n)) P[(numpy.arange(n), permutation)] = 1 dist1 = numpy.linalg.norm(W1 - P.dot(W2).dot(P.T)) ** 2 # Now compute the vertex similarities distance V1 = graph1.getVertexList().getVertices() V2 = graph2.getVertexList().getVertices() if V1.shape[0] < V2.shape[0]: V1 = Util.extendArray(V1, V2.shape) elif V2.shape[0] < V1.shape[0]: V2 = Util.extendArray(V2, V1.shape) dist2 = numpy.sum((V1 - P.T.dot(V2)) ** 2) norm1 = (W1 ** 2).sum() + (W2 ** 2).sum() norm2 = (V1 ** 2).sum() + (V2 ** 2).sum() if norm1 != 0: dist1 = dist1 / norm1 if norm2 != 0: dist2 = dist2 / norm2 dist = (1 - self.alpha) * dist1 + self.alpha * dist2 return dist
def testExtendArray(self): X = numpy.random.rand(5, 5) X2 = Util.extendArray(X, (10, 5)) nptst.assert_array_equal(X, X2[0:5, :]) nptst.assert_array_equal(0, X2[5:, :]) X2 = Util.extendArray(X, (10, 5), 1.23) nptst.assert_array_equal(X, X2[0:5, :]) nptst.assert_array_equal(1.23, X2[5:, :]) #Now try extending using an array X2 = Util.extendArray(X, (10, 5), numpy.array([1, 2, 3, 4, 5])) nptst.assert_array_equal(X, X2[0:5, :]) for i in range(5, 10): nptst.assert_array_equal(numpy.array([1, 2, 3, 4, 5]), X2[i, :])
def distance(self, graph1, graph2, permutation, normalised=False, nonNeg=False, verbose=False): """ Compute the graph distance metric between two graphs given a permutation vector. This is given by F(P) = (1-alpha)/(||W1||^2_F + ||W2||^2_F)(||W1 - P W2 P.T||^2_F) - alpha 1/||C||_F tr(C.T P) in the normalised case. If we want an unnormalised solution it is computed as (1-alpha)/(||W1 - P W2 P.T||^2_F) - alpha tr C.T P and finally there is a standardised case in which the distance is between 0 and 1, where ||C||_F is used to normalise the vertex similarities and we assume 0 <= C_ij <= 1. :param graph1: A graph object :param graph2: The second graph object to match :param permutation: An array of permutation indices matching the first to second graph :type permutation: `numpy.ndarray` :param normalised: Specify whether to normalise the objective function :type normalised: `bool` :param nonNeg: Specify whether we want a non-negative solution. :type nonNeg: `bool` :param verbose: Specify whether to return graph and label distance :type nonNeg: `bool` """ if graph1.size == 0 and graph2.size == 0: if not verbose: return 0.0 else: return 0.0, 0.0, 0.0 elif graph1.size == 0 or graph2.size == 0: if normalised: if not verbose: return 1 - self.alpha else: return 1 - self.alpha, 1 - self.alpha, 0.0 else: raise ValueError("Unsupported case") if self.useWeightM: W1 = graph1.getWeightMatrix() W2 = graph2.getWeightMatrix() else: W1 = graph1.adjacencyMatrix() W2 = graph2.adjacencyMatrix() if W1.shape[0] < W2.shape[0]: W1 = Util.extendArray(W1, W2.shape, self.rho) elif W2.shape[0] < W1.shape[0]: W2 = Util.extendArray(W2, W1.shape, self.rho) n = W1.shape[0] P = numpy.zeros((n, n)) P[(numpy.arange(n), permutation)] = 1 dist1 = numpy.linalg.norm(W1 - P.dot(W2).dot(P.T)) ** 2 # Now compute the vertex similarities trace C = self.vertexSimilarities(graph1, graph2) minC = numpy.min(C) maxC = numpy.max(C) C = Util.extendArray(C, (n, n), minC + self.gamma * (maxC - minC)) dist2 = numpy.trace(C.T.dot(P)) if normalised: norm1 = (W1 ** 2).sum() + (W2 ** 2).sum() norm2 = numpy.linalg.norm(C) if norm1 != 0: dist1 = dist1 / norm1 if norm2 != 0: dist2 = dist2 / norm2 dist = (1 - self.alpha) * dist1 - self.alpha * dist2 # If nonNeg = True then we add a term to the distance to ensure it is # always positive. The numerator is an upper bound on tr(C.T P) if nonNeg and normalised: normC = norm2 logging.debug( "Graph distance: " + str(dist1) + " label distance: " + str(dist2) + " distance offset: " + str(self.alpha * n / normC) + " graph sizes: " + str((graph1.size, graph2.size)) ) if normC != 0: dist = dist + self.alpha * n / normC else: logging.debug( "Graph objective: " + str(dist1) + " label objective: " + str(dist2) + " weighted objective: " + str(dist) + " graph sizes: " + str((graph1.size, graph2.size)) ) if verbose: return dist, dist1, dist2 else: return dist
def distance(self, graph1, graph2, permutation, normalised=False, nonNeg=False, verbose=False): """ Compute the graph distance metric between two graphs given a permutation vector. This is given by F(P) = (1-alpha)/(||W1||^2_F + ||W2||^2_F)(||W1 - P W2 P.T||^2_F) - alpha 1/||C||_F tr(C.T P) in the normalised case. If we want an unnormalised solution it is computed as (1-alpha)/(||W1 - P W2 P.T||^2_F) - alpha tr C.T P and finally there is a standardised case in which the distance is between 0 and 1, where ||C||_F is used to normalise the vertex similarities and we assume 0 <= C_ij <= 1. :param graph1: A graph object :param graph2: The second graph object to match :param permutation: An array of permutation indices matching the first to second graph :type permutation: `numpy.ndarray` :param normalised: Specify whether to normalise the objective function :type normalised: `bool` :param nonNeg: Specify whether we want a non-negative solution. :type nonNeg: `bool` :param verbose: Specify whether to return graph and label distance :type nonNeg: `bool` """ if graph1.size == 0 and graph2.size == 0: if not verbose: return 0.0 else: return 0.0, 0.0, 0.0 elif graph1.size == 0 or graph2.size == 0: if normalised: if not verbose: return 1 - self.alpha else: return 1 - self.alpha, 1 - self.alpha, 0.0 else: raise ValueError("Unsupported case") if self.useWeightM: W1 = graph1.getWeightMatrix() W2 = graph2.getWeightMatrix() else: W1 = graph1.adjacencyMatrix() W2 = graph2.adjacencyMatrix() if W1.shape[0] < W2.shape[0]: W1 = Util.extendArray(W1, W2.shape, self.rho) elif W2.shape[0] < W1.shape[0]: W2 = Util.extendArray(W2, W1.shape, self.rho) n = W1.shape[0] P = numpy.zeros((n, n)) P[(numpy.arange(n), permutation)] = 1 dist1 = numpy.linalg.norm(W1 - P.dot(W2).dot(P.T))**2 #Now compute the vertex similarities trace C = self.vertexSimilarities(graph1, graph2) minC = numpy.min(C) maxC = numpy.max(C) C = Util.extendArray(C, (n, n), minC + self.gamma * (maxC - minC)) dist2 = numpy.trace(C.T.dot(P)) if normalised: norm1 = ((W1**2).sum() + (W2**2).sum()) norm2 = numpy.linalg.norm(C) if norm1 != 0: dist1 = dist1 / norm1 if norm2 != 0: dist2 = dist2 / norm2 dist = (1 - self.alpha) * dist1 - self.alpha * dist2 #If nonNeg = True then we add a term to the distance to ensure it is #always positive. The numerator is an upper bound on tr(C.T P) if nonNeg and normalised: normC = norm2 logging.debug("Graph distance: " + str(dist1) + " label distance: " + str(dist2) + " distance offset: " + str(self.alpha * n / normC) + " graph sizes: " + str((graph1.size, graph2.size))) if normC != 0: dist = dist + self.alpha * n / normC else: logging.debug("Graph objective: " + str(dist1) + " label objective: " + str(dist2) + " weighted objective: " + str(dist) + " graph sizes: " + str((graph1.size, graph2.size))) if verbose: return dist, dist1, dist2 else: return dist
def next(self): X = self.XIterator.next() logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros") if self.iterativeSoftImpute.weighted: #Compute row and col probabilities up, vp = SparseUtils.nonzeroRowColsProbs(X) nzuInds = up == 0 nzvInds = vp == 0 u = numpy.sqrt(1 / (up + numpy.array(nzuInds, numpy.int))) v = numpy.sqrt(1 / (vp + numpy.array(nzvInds, numpy.int))) u[nzuInds] = 0 v[nzvInds] = 0 if self.rhos != None: self.iterativeSoftImpute.setRho(self.rhos.next()) if not scipy.sparse.isspmatrix_csc(X): raise ValueError("X must be a csc_matrix not " + str(type(X))) #Figure out what lambda should be #PROPACK has problems with convergence Y = scipy.sparse.csc_matrix(X, dtype=numpy.float) U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20) del Y #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20) maxS = s[0] logging.debug("Largest singular value : " + str(maxS)) (n, m) = X.shape if self.j == 0: self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: oldN = self.oldU.shape[0] oldM = self.oldV.shape[0] if self.iterativeSoftImpute.updateAlg == "initial": if n > oldN: self.oldU = Util.extendArray( self.oldU, (n, self.oldU.shape[1])) elif n < oldN: self.oldU = self.oldU[0:n, :] if m > oldM: self.oldV = Util.extendArray( self.oldV, (m, self.oldV.shape[1])) elif m < oldN: self.oldV = self.oldV[0:m, :] elif self.iterativeSoftImpute.updateAlg == "zero": self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: raise ValueError("Unknown SVD update algorithm: " + self.updateAlg) rowInds, colInds = X.nonzero() gamma = self.iterativeSoftImpute.eps + 1 i = 0 self.iterativeSoftImpute.measures = numpy.zeros( (self.iterativeSoftImpute.maxIterations, 4)) while gamma > self.iterativeSoftImpute.eps: if i == self.iterativeSoftImpute.maxIterations: logging.debug("Maximum number of iterations reached") break ZOmega = SparseUtilsCython.partialReconstructPQ( (rowInds, colInds), self.oldU * self.oldS, self.oldV) Y = X - ZOmega #Y = Y.tocsc() #del ZOmega Y = csarray(Y, storagetype="row") gc.collect() #os.system('taskset -p 0xffffffff %d' % os.getpid()) if self.iterativeSoftImpute.svdAlg == "propack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdPropack( L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg == "arpack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdArpack( L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg == "svdUpdate": newU, newS, newV = SVDUpdate.addSparseProjected( self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k) elif self.iterativeSoftImpute.svdAlg == "rsvd": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) if self.j == 0: newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV) elif self.iterativeSoftImpute.svdAlg == "rsvdUpdate2": if self.j == 0: L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd( L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: #Need linear operator which is U s V L = LinOperatorUtils.lowRankOp( self.oldU, self.oldS, self.oldV) Y = GeneralLinearOperator.asLinearOperator( Y, parallel=True) newU, newS, newV = RandomisedSVD.updateSvd( L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p) else: raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg) if self.iterativeSoftImpute.weighted and i == 0: delta = numpy.diag((u * newU.T).dot(newU)) pi = numpy.diag((v * newV.T).dot(newV)) lmbda = (maxS / numpy.max( delta * pi)) * self.iterativeSoftImpute.rho lmbdav = lmbda * delta * pi elif not self.iterativeSoftImpute.weighted: lmbda = maxS * self.iterativeSoftImpute.rho if i == 0: logging.debug("lambda: " + str(lmbda)) lmbdav = lmbda newS = newS - lmbdav #Soft threshold newS = numpy.clip(newS, 0, numpy.max(newS)) normOldZ = (self.oldS**2).sum() normNewZmOldZ = (self.oldS**2).sum() + ( newS**2).sum() - 2 * numpy.trace( (self.oldV.T.dot(newV * newS)).dot( newU.T.dot(self.oldU * self.oldS))) #We can get newZ == oldZ in which case we break if normNewZmOldZ < self.tol: gamma = 0 elif abs(normOldZ) < self.tol: gamma = self.iterativeSoftImpute.eps + 1 else: gamma = normNewZmOldZ / normOldZ if self.iterativeSoftImpute.verbose: theta1 = ( self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')** 2) / self.iterativeSoftImpute.k theta2 = ( self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')** 2) / self.iterativeSoftImpute.k thetaS = numpy.linalg.norm( newS - self.oldS)**2 / numpy.linalg.norm(newS)**2 self.iterativeSoftImpute.measures[i, :] = numpy.array( [gamma, theta1, theta2, thetaS]) self.oldU = newU.copy() self.oldS = newS.copy() self.oldV = newV.copy() logging.debug("Iteration " + str(i) + " gamma=" + str(gamma)) i += 1 if self.iterativeSoftImpute.postProcess: #Add the mean vectors previousS = newS newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()] newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()] newS = self.iterativeSoftImpute.unshrink(X, newU, newV) #Note that this increases the rank of U and V by 1 #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Number of iterations for rho=" + str(self.iterativeSoftImpute.rho) + ": " + str(i)) self.j += 1 return (newU, newS, newV)
def next(self): X = self.XIterator.next() logging.debug("Learning on matrix with shape: " + str(X.shape) + " and " + str(X.nnz) + " non-zeros") if self.iterativeSoftImpute.weighted: #Compute row and col probabilities up, vp = SparseUtils.nonzeroRowColsProbs(X) nzuInds = up==0 nzvInds = vp==0 u = numpy.sqrt(1/(up + numpy.array(nzuInds, numpy.int))) v = numpy.sqrt(1/(vp + numpy.array(nzvInds, numpy.int))) u[nzuInds] = 0 v[nzvInds] = 0 if self.rhos != None: self.iterativeSoftImpute.setRho(self.rhos.next()) if not scipy.sparse.isspmatrix_csc(X): raise ValueError("X must be a csc_matrix not " + str(type(X))) #Figure out what lambda should be #PROPACK has problems with convergence Y = scipy.sparse.csc_matrix(X, dtype=numpy.float) U, s, V = ExpSU.SparseUtils.svdArpack(Y, 1, kmax=20) del Y #U, s, V = SparseUtils.svdPropack(X, 1, kmax=20) maxS = s[0] logging.debug("Largest singular value : " + str(maxS)) (n, m) = X.shape if self.j == 0: self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: oldN = self.oldU.shape[0] oldM = self.oldV.shape[0] if self.iterativeSoftImpute.updateAlg == "initial": if n > oldN: self.oldU = Util.extendArray(self.oldU, (n, self.oldU.shape[1])) elif n < oldN: self.oldU = self.oldU[0:n, :] if m > oldM: self.oldV = Util.extendArray(self.oldV, (m, self.oldV.shape[1])) elif m < oldN: self.oldV = self.oldV[0:m, :] elif self.iterativeSoftImpute.updateAlg == "zero": self.oldU = numpy.zeros((n, 1)) self.oldS = numpy.zeros(1) self.oldV = numpy.zeros((m, 1)) else: raise ValueError("Unknown SVD update algorithm: " + self.updateAlg) rowInds, colInds = X.nonzero() gamma = self.iterativeSoftImpute.eps + 1 i = 0 self.iterativeSoftImpute.measures = numpy.zeros((self.iterativeSoftImpute.maxIterations, 4)) while gamma > self.iterativeSoftImpute.eps: if i == self.iterativeSoftImpute.maxIterations: logging.debug("Maximum number of iterations reached") break ZOmega = SparseUtilsCython.partialReconstructPQ((rowInds, colInds), self.oldU*self.oldS, self.oldV) Y = X - ZOmega #Y = Y.tocsc() #del ZOmega Y = csarray(Y, storagetype="row") gc.collect() #os.system('taskset -p 0xffffffff %d' % os.getpid()) if self.iterativeSoftImpute.svdAlg=="propack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdPropack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg=="arpack": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=False) newU, newS, newV = SparseUtils.svdArpack(L, k=self.iterativeSoftImpute.k, kmax=self.iterativeSoftImpute.kmax) elif self.iterativeSoftImpute.svdAlg=="svdUpdate": newU, newS, newV = SVDUpdate.addSparseProjected(self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k) elif self.iterativeSoftImpute.svdAlg=="rsvd": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate": L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) if self.j == 0: newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.qu, omega=self.oldV) elif self.iterativeSoftImpute.svdAlg=="rsvdUpdate2": if self.j == 0: L = LinOperatorUtils.sparseLowRankOp(Y, self.oldU, self.oldS, self.oldV, parallel=True) newU, newS, newV = RandomisedSVD.svd(L, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p, q=self.iterativeSoftImpute.q) else: #Need linear operator which is U s V L = LinOperatorUtils.lowRankOp(self.oldU, self.oldS, self.oldV) Y = GeneralLinearOperator.asLinearOperator(Y, parallel=True) newU, newS, newV = RandomisedSVD.updateSvd(L, self.oldU, self.oldS, self.oldV, Y, self.iterativeSoftImpute.k, p=self.iterativeSoftImpute.p) else: raise ValueError("Unknown SVD algorithm: " + self.iterativeSoftImpute.svdAlg) if self.iterativeSoftImpute.weighted and i==0: delta = numpy.diag((u*newU.T).dot(newU)) pi = numpy.diag((v*newV.T).dot(newV)) lmbda = (maxS/numpy.max(delta*pi))*self.iterativeSoftImpute.rho lmbdav = lmbda*delta*pi elif not self.iterativeSoftImpute.weighted: lmbda = maxS*self.iterativeSoftImpute.rho if i==0: logging.debug("lambda: " + str(lmbda)) lmbdav = lmbda newS = newS - lmbdav #Soft threshold newS = numpy.clip(newS, 0, numpy.max(newS)) normOldZ = (self.oldS**2).sum() normNewZmOldZ = (self.oldS**2).sum() + (newS**2).sum() - 2*numpy.trace((self.oldV.T.dot(newV*newS)).dot(newU.T.dot(self.oldU*self.oldS))) #We can get newZ == oldZ in which case we break if normNewZmOldZ < self.tol: gamma = 0 elif abs(normOldZ) < self.tol: gamma = self.iterativeSoftImpute.eps + 1 else: gamma = normNewZmOldZ/normOldZ if self.iterativeSoftImpute.verbose: theta1 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldU.T.dot(newU), 'fro')**2)/self.iterativeSoftImpute.k theta2 = (self.iterativeSoftImpute.k - numpy.linalg.norm(self.oldV.T.dot(newV), 'fro')**2)/self.iterativeSoftImpute.k thetaS = numpy.linalg.norm(newS - self.oldS)**2/numpy.linalg.norm(newS)**2 self.iterativeSoftImpute.measures[i, :] = numpy.array([gamma, theta1, theta2, thetaS]) self.oldU = newU.copy() self.oldS = newS.copy() self.oldV = newV.copy() logging.debug("Iteration " + str(i) + " gamma="+str(gamma)) i += 1 if self.iterativeSoftImpute.postProcess: #Add the mean vectors previousS = newS newU = numpy.c_[newU, numpy.array(X.mean(1)).ravel()] newV = numpy.c_[newV, numpy.array(X.mean(0)).ravel()] newS = self.iterativeSoftImpute.unshrink(X, newU, newV) #Note that this increases the rank of U and V by 1 #print("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Difference in s after postprocessing: " + str(numpy.linalg.norm(previousS - newS[0:-1]))) logging.debug("Number of iterations for rho="+str(self.iterativeSoftImpute.rho) + ": " + str(i)) self.j += 1 return (newU, newS, newV)