def fit_mtl_proj_WFA(q_wfa, R, R_task_vec, H_P, task_id_vec, version, sparse): ''' Fit WFA with multi-tasking and projection, i.e. task-specific WFAs :param q_wfa: a Q_WFA instance (class above) :param R: Desired rank :param H_P: Hankel matrix :param R_task_vec: Desired ranks for each task (in a vector form) :param task_id_vec: task ids vector :param version: version of the Hankel matrix, currently only support 'classic' :param sparse: Boolean, if the Hankel matrix is in DOX sparse matrix form :return: the status of the construction, the Q_WFA instance (task-specific WFAs) ''' P_P = [] for task_id in task_id_vec: H = H_P[task_id] if sparse: U, D, VT = sparse_svd(H[0], k=R) else: H_1 = flatten_k(H[0], 0) U, D, VT = np.linalg.svd(H_1) P = U[:, :R].dot(np.diag(D[:R])) P_P.append(P) H_P = np.asarray(H_P) qwfa = q_wfa.fit_WFA(H_P, R, task_id_vec=task_id_vec, R_tasks_vec=R_task_vec, P_vec=P_P, version=version, sparse=sparse) return qwfa, q_wfa
def corona(self): cmat = self.get_conjunctions() mat_up, mat_down = self.get_matrices() hub = np.ones((mat_up.shape[0],)) authority = np.ones((mat_up.shape[0],)) for iter in xrange(100): vec = authority + hub vec /= np.max(vec) root = vec * 0 root[0] = 1.0 conj_sums = cmat * vec conj_par = 1.0/(np.maximum(EPS, cmat * (1.0 / np.maximum(EPS, vec)))) conj_factor = np.minimum(1.0, conj_par / (conj_sums+EPS)) conj_diag = make_diag(conj_factor) combined = conj_diag * (mat_up + mat_down) + make_diag(np.ones(len(vec))) #combined = (mat_up + mat_down) * 0.5 u, sigma, v = sparse_svd(combined, k=4) activation = np.dot(u, u[0]) #w, v = eigen(combined, k=3, v0=root) #activation = v[:, np.argmax(w)] #activation = (activation / (activation[0]+EPS)).real print activation print sigma print conj_factor print hub += self._fast_matrix.T * conj_diag * activation authority += conj_diag * self._fast_matrix * activation hub /= np.max(hub) authority /= np.max(authority) return zip(self.nodes, hub, authority)
def corona(self): cmat = self.get_conjunctions() mat_up, mat_down = self.get_matrices() hub = np.ones((mat_up.shape[0], )) authority = np.ones((mat_up.shape[0], )) for iter in xrange(100): vec = authority + hub vec /= np.max(vec) root = vec * 0 root[0] = 1.0 conj_sums = cmat * vec conj_par = 1.0 / (np.maximum(EPS, cmat * (1.0 / np.maximum(EPS, vec)))) conj_factor = np.minimum(1.0, conj_par / (conj_sums + EPS)) conj_diag = make_diag(conj_factor) combined = conj_diag * (mat_up + mat_down) + make_diag( np.ones(len(vec))) #combined = (mat_up + mat_down) * 0.5 u, sigma, v = sparse_svd(combined, k=4) activation = np.dot(u, u[0]) #w, v = eigen(combined, k=3, v0=root) #activation = v[:, np.argmax(w)] #activation = (activation / (activation[0]+EPS)).real print activation print sigma print conj_factor print hub += self._fast_matrix.T * conj_diag * activation authority += conj_diag * self._fast_matrix * activation hub /= np.max(hub) authority /= np.max(authority) return zip(self.nodes, hub, authority)
def corona(self): cmat = self.get_conjunctions() mat_up, mat_down = self.get_matrices() hub = np.ones((mat_up.shape[0], )) / mat_up.shape[0] / 100 authority = np.ones((mat_up.shape[0], )) / mat_up.shape[0] / 100 prev_activation = np.zeros((mat_up.shape[0], )) prev_err = 1.0 for iter in xrange(100): vec = authority + hub vec /= np.max(vec) root = np.zeros(len(vec), 'f') root[0] = 1.0 conj_sums = cmat * vec conj_par = 1.0 / (np.maximum(EPS, cmat * (1.0 / np.maximum(EPS, vec)))) conj_factor = np.minimum(1.0, conj_par / (conj_sums + EPS)) conj_diag = make_diag(conj_factor) combined = conj_diag * (mat_up + mat_down) * 0.25 + make_diag( np.ones(len(vec)) * 0.5) #combined = (mat_up + mat_down) * 0.5 u, sigma, v = sparse_svd(combined, k=1) activation = u[:, 0] #activation = np.dot(u, u[0]) #w, v = eigen(combined.T, k=1, v0=root, which='LR') #activation = v[:, np.argmax(w)].real activation *= np.sign(np.sum(activation)) activation /= (np.sum(np.abs(activation)) + EPS) hub += (hub + self._final_matrix_T * conj_diag * activation) / 2 authority += (authority + conj_diag * self._final_matrix * activation) / 2 print activation err = np.max(np.abs(activation - prev_activation))\ / np.max(np.abs(activation)) print err if iter >= 3 and err + prev_err < 1e-9: print "converged on iteration %d" % iter break prev_err = err prev_activation = activation.copy() print sigma print conj_factor print hub = self._final_matrix_T * conj_diag * activation authority = conj_diag * self._final_matrix * activation return zip(self.nodes, hub, authority)
def corona(self): cmat = self.get_conjunctions() mat_up, mat_down = self.get_matrices() hub = np.ones((mat_up.shape[0],)) / mat_up.shape[0] / 100 authority = np.ones((mat_up.shape[0],)) / mat_up.shape[0] / 100 prev_activation = np.zeros((mat_up.shape[0],)) prev_err = 1.0 for iter in xrange(100): vec = authority + hub vec /= np.max(vec) root = np.zeros(len(vec), 'f') root[0] = 1.0 conj_sums = cmat * vec conj_par = 1.0/(np.maximum(EPS, cmat * (1.0 / np.maximum(EPS, vec)))) conj_factor = np.minimum(1.0, conj_par / (conj_sums+EPS)) conj_diag = make_diag(conj_factor) combined = conj_diag * (mat_up + mat_down) * 0.25 + make_diag(np.ones(len(vec))*0.5) #combined = (mat_up + mat_down) * 0.5 u, sigma, v = sparse_svd(combined, k=1) activation = u[:, 0] #activation = np.dot(u, u[0]) #w, v = eigen(combined.T, k=1, v0=root, which='LR') #activation = v[:, np.argmax(w)].real activation *= np.sign(np.sum(activation)) activation /= (np.sum(np.abs(activation)) + EPS) hub += (hub + self._final_matrix_T * conj_diag * activation) / 2 authority += (authority + conj_diag * self._final_matrix * activation) / 2 print activation err = np.max(np.abs(activation - prev_activation))\ / np.max(np.abs(activation)) print err if iter >= 3 and err + prev_err < 1e-9: print "converged on iteration %d" % iter break prev_err = err prev_activation = activation.copy() print sigma print conj_factor print hub = self._final_matrix_T * conj_diag * activation authority = conj_diag * self._final_matrix * activation return zip(self.nodes, hub, authority)
print 'userid = ',userid print 'KNN Type = ',KNNType with open(filepath+'User_'+userid+'/'+KNNType+"/KNN_"+userid+".out", 'r') as f: new_data = pickle.load(f) #Reading query, row values of the userid to which recommendation has to be done. with open(filepath+'User_'+userid+'/'+KNNType+'/'+userid+'.out', 'r') as f1: query = pickle.load(f1) query1 = np.array(query) svdInputMatrix = np.array(new_data,dtype=np.float) svdInputList= [] u,s,vt = sparse_svd(svdInputMatrix) print u.shape, s.shape, vt.shape print s #Calculating energy for row elimination energy = 0 for i in range(len(s)): energy = energy + (s[i]*s[i]) energy = (energy * 90)/100 print energy ######################################### V = np.transpose(vt) print V.shape
def svd(self, output): # 对 self.csc 做 SVD,生成 self.projectMatrix。 U, s, V = sparse_svd(self.csc.astype(np.float32), k=min(100, min(self.csc.shape) - 1), return_singular_vectors='u') del self.csc self.projectMatrix = scipy.sparse.csr_matrix(spares_inv(scipy.sparse.diags(s)).dot(U.T)) # 生成 映射矩阵。应该持久化保存。
def fit_WFA(self, H, R, task_id_vec=[0], R_tasks_vec=None, P_vec=None, version='classic', sparse=False, return_P=False): ''' General funtion for fitting the WFA :param H: Hankel matrix (can be either meta or task-specific one) :param R: Desired rank of the SVD of H :param task_id_vec: the vector of task ids :param R_tasks_vec: If executing multi_proj, set this parameter to desired task-specific rank :param P_vec: left singular vectors of each individual Hankel matrix corresponding to each task :param version: version of the Hankel matrix, currently only support 'classic' :param sparse: Boolean, if the Hankel matrix is in DOX sparse matrix form :param return_P: if you want to return the singular vectors of the meta Hankel matrix :return: ''' # sparse = False try: if sparse: acc = [] for task in range(len(H)): acc.append(H[task][0]) acc = sps.hstack(acc) U, D, VT = sparse_svd(acc, k=R) else: H = np.array(H) H = H.transpose((1, 2, 0, 3)) H_1 = flatten_k(H[0], 0) U, D, VT = np.linalg.svd(H_1) P = U[:, :R].dot(np.diag(D[:R])) S = VT[:R, :] P_inv = np.linalg.pinv(P) S_inv = np.linalg.pinv(S) alpha = P[0] if sparse: acc = [] for task in range(len(H)): acc.append(H[task][0][:, 0]) acc = sps.hstack(acc) Omega = P_inv @ acc As = [] for sigma in range(1, len(H[0])): acc = [] for task in range(len(H)): acc.append(H[task][sigma]) acc = sps.hstack(acc) A_sigma = P_inv @ acc @ S_inv As.append(A_sigma) As = np.array(As) else: Omega = P_inv @ H[0, :, :, 0] As = [] for sigma in range(1, H.shape[0]): H_1_sigma = flatten_k(H[sigma], 0) A_sigma = P_inv @ H_1_sigma @ S_inv As.append(A_sigma) As = np.array(As) A = np.sum(As, axis=0) # print(R) for k in task_id_vec: self.alpha.append(alpha) self.As.append(As) self.Omega.append(Omega[:, k]) if P_vec is not None: n_tasks = len(task_id_vec) alpha = copy.deepcopy(self.alpha) As = copy.deepcopy(self.As) Omega = copy.deepcopy(self.Omega) self.convert_meta_task_Q_WFA(alpha, As, Omega, n_tasks, P, P_vec, R_tasks_vec) except: #print(error) return 'Failed', [] if return_P: return 'Success', P return 'Success', []
query1 = np.array(query) svdInputMatrix = np.array(new_data,dtype=np.float) svdInputList= [] for i in range(len(svdInputMatrix)): correlation,p = pearsonr(query1,svdInputMatrix[i]) print "correlation = ", correlation if correlation > 0: svdInputList.append(svdInputMatrix[i]) #print array[1] svdInputMatrixWithPearson = np.array(svdInputList,dtype=np.float) print "new size after corelation check" print type(svdInputMatrixWithPearson) print svdInputMatrixWithPearson.shape u,s,vt = sparse_svd(svdInputMatrixWithPearson) "u,s,vt = sparse_svd(svdInputMatrix)" print u.shape, s.shape, vt.shape print s energy = 0 for i in range(len(s)): energy = energy + (s[i]*s[i]) energy = (energy * 90)/100 print energy V = np.transpose(vt) print V.shape print type(V) result = np.dot(query1,V)
with open(save_path + 'tf_idf.pkl', 'r') as f: tf_idf = pickle.load(f) with open(save_path + 'scores.pkl', 'r') as f: scores = pickle.load(f) if is_sparse: tf_idf = csc_matrix(tf_idf) tf_idf_top = [] tf_idf = tf_idf.T print('Doing svd...') if is_sparse: U, D, V = sparse_svd(tf_idf, min(tf_idf.shape)) #, k = min(PPMI.shape) - 1) U = U.T else: U, D, V = svd(tf_idf, full_matrices=False) print('finished doing svd.') embeddings_U_tot = U * np.sqrt(D) tot_l = 1 tot_u = min(vocabulary_size, D.shape[0]) for i in range(tot_l, tot_u): print(i) keep_dim = i keep_dims = np.arange(keep_dim) embeddings_U = embeddings_U_tot[:, keep_dims]