def main(): parser = ArgumentParser('randne', formatter_class=ArgumentDefaultsHelpFormatter, conflict_handler='resolve') parser.add_argument('--input', nargs='?', required=True, help='Input graph file') parser.add_argument( '--matfile-variable-name', default='network', help='Variable name of adjacency matrix inside a .mat file') parser.add_argument('--output', required=True, help='Output representation file') parser.add_argument( '--use-trans-matrix', default=False, action='store_true', help='''The input matrix for RandNE. Adjacency matrix is used by default; set this flag to use the transition matrix instead.''') parser.add_argument('-q', '--order', default=3, type=int, help='Maximum order of adjacency matrix.') parser.add_argument( '-d', '--representation-size', default=128, type=int, help='Number of latent dimensions to learn for each node.') parser.add_argument( '--weights', nargs='+', required=True, help= 'Weights for each power of the adjacency matrix (or transition matrix).' ) args = parser.parse_args() # Process args mat_obj = loadmat(args.input) A = mat_obj[args.matfile_variable_name] if args.use_trans_matrix: N = A.shape[0] normalizer = spdiags(np.squeeze(1.0 / csc_matrix.sum(A, axis=1)), 0, N, N) input_mat = normalizer @ A else: input_mat = A weights = list(map(float, args.weights)) # Start RandNE U_list = randne_projection(input_mat, q=args.order, dim=args.representation_size) U = randne_merge(U_list, weights) savemat(args.output, {'emb': U})
def fastrp_projection(A, q=3, dim=128, projection_method='gaussian', input_matrix='adj', alpha=None): assert input_matrix == 'adj' or input_matrix == 'trans' assert projection_method == 'gaussian' or projection_method == 'sparse' if input_matrix == 'adj': M = A else: N = A.shape[0] normalizer = spdiags(np.squeeze(1.0 / csc_matrix.sum(A, axis=1)), 0, N, N) M = normalizer @ A # Gaussian projection matrix if projection_method == 'gaussian': transformer = random_projection.GaussianRandomProjection( n_components=dim, random_state=42) # Sparse projection matrix else: transformer = random_projection.SparseRandomProjection( n_components=dim, random_state=42) Y = transformer.fit(M) # Random projection for A if alpha is not None: Y.components_ = Y.components_ @ spdiags( \ np.squeeze(np.power(csc_matrix.sum(A, axis=1), alpha)), 0, N, N) cur_U = transformer.transform(M) U_list = [cur_U] for i in range(2, q + 1): cur_U = M @ cur_U U_list.append(cur_U) return U_list
def RefineBound(S_ori, S_add, Loss_ori, K): """Function to calculate the objective funciton or loss. Loss_Bound = Loss_ori + trace_change(S x S^T) - eigs(delta(S x S^T),K) Args: S_ori (Sparse Matrix): Sparse scipy of original adjancey matrix S_add (Sparse Matrix): Sparse scipy of added adjancey matrix K (int): Embedding dimension loss_ori (float): Original loss value Returns: Float: New calculcated loss bound """ # Calculate the trace change S_overlap = (S_add != 0).multiply(S_ori) S_temp = S_add + S_overlap trace_change = csc_matrix.sum( S_temp.multiply(S_temp) - S_overlap.multiply(S_overlap)) # Calculate eigenvalues sum of delta(S * S^T) # Note: we only need to deal with non-zero rows/columns S_temp = S_ori.dot(S_add) # import pdb # pdb.set_trace() S_temp = S_temp + S_temp.transpose() + S_add.dot(S_add) # _,S_choose,_ = find(csc_matrix.sum(S_temp, axis=0)) # S_temp = S_temp[S_choose,S_choose] temp_eigs, _ = eigs(S_temp, min(2 * K, S_temp.shape[0])) temp_eigs = np.absolute(temp_eigs) temp_eigs = temp_eigs[temp_eigs >= 0] temp_eigs = np.sort(temp_eigs)[::-1] if len(temp_eigs) >= K: eigen_sum = sum(temp_eigs[:K]) else: temp_l = len(temp_eigs) eigen_sum = sum(temp_eigs) + temp_eigs[temp_l - 1] * (K - temp_l) return Loss_ori + trace_change - eigen_sum
K = int(sys.argv[2]) convergeDist = float(sys.argv[3]) kPoints = data.repartition(1).takeSample(False, K, 1) tempDist = 1.0 while tempDist > convergeDist: closest = data.map( lambda p: (closestPoint(p, kPoints), (p, 1))) pointStats = closest.reduceByKey( lambda p1_c1, p2_c2: (p1_c1[0] + p2_c2[0], p1_c1[1] + p2_c2[1])) newPoints = pointStats.map( lambda st: (st[0], st[1][0] / st[1][1])).collect() tempDist = sum(csc_matrix.sum((kPoints[iK] - p).power(2)) for (iK, p) in newPoints) for (iK, p) in newPoints: kPoints[iK] = p count = [0] * len(kPoints) for i in range(0,len(kPoints)): count[i] = len(kPoints[i].nonzero()[0]) opfile = open(sys.argv[4], "w") for i in count: opstr = str(i) opfile.write("%s" % opstr) opfile.write("\n") sc.stop()
def chol_approx(S, epsilon, ra): mu = S.mean(0) [n, d] = S.shape mean_S = csc_matrix.sum(S, 0) Var = S.transpose().dot(S) G = np.zeros((d, ra)) Qjj = np.array([]) for i in range(d): start_time = timeit.default_timer() Qjj.append(Q_kj(mu, Var, mean_S, n, i, i)) end_time = timeit.default_timer() print('The time used is') print(end_time - start_time) epsilon = epsilon * sum(Qjj) # tmpV=np.zeros((d,1)) # tmp=0 perm = list(range(d)) ind = 0 for i in range(ra): #print('current iter is %d' % i) for j in range(i, d): # print (perm[j]) # print (Qjj[perm[j]]) G[j, i] = Qjj[perm[j]] for m in range(i): G[j, i] = G[j, i] - G[j, m] * G[j, m] sump = np.sum(G[i:d, i]) if ind == 0 and sump > epsilon and G[i, i] == 0: G[i:d, i] = np.zeros((d - i, )) continue else: ind += 1 if sump > epsilon: val = G[i:d, i].max() idx = G[i:d, i].argmax() if abs(val) < 1e-8: break idx = idx + i if ind == 1: idx = i temp = perm[i] perm[i] = perm[idx] perm[idx] = temp for m in range(i): tmpW = G[idx, m] G[idx, m] = G[i, m] G[i, m] = tmpW if ind == 1: G[i, i] = np.sqrt(G[i, i]) else: G[i, i] = np.sqrt(val) for m in range(i + 1, d): start_time = timeit.default_timer() G[m, i] = Q_kj(mu, Var, mean_S, n, perm[m], perm[i]) # for j in range(i): # G[m, i] = G[m, i] - G[m, j] * G[i, j] G[m, i] -= np.dot(G[m, 0:i], G[i, 0:i]) end_time = timeit.default_timer() print('The time used are') print(end_time - start_time) G[i + 1:d, i] = G[i + 1:d, i] / G[i, i] else: k = i - 1 break per = np.argsort(perm) return G[per, :]
n, d = X_train_n.shape # X_train_n = sparse.hstack((X_train_n, np.ones((n, 1)))).tocsc() # print(1) # print(sparse.isspmatrix_csc(X_train_n)) X_train_p1 = matfile['data_tra_DFO'].X.toarray() # positive training X_train_n1 = matfile['data_tra_DFO'].Y.toarray() # negative training print(sparse.isspmatrix_csc(X_train_n)) CC = np.mean(X_train_n1, 0) mu = X_train_n.mean(0) k = 2 j = 3 S = X_train_n [n, d] = S.shape print(S.shape) mean_S = csc_matrix.sum(S, 0) print(mean_S.shape) print(type(mean_S)) print(type(S[:, 2])) start_time = timeit.default_timer() S[:, k].T * S[:, j] end_time = timeit.default_timer() print('1', end_time - start_time) start_time = timeit.default_timer() csc_matrix(mu[0, j] * np.ones((n, ))) * S[:, k] end_time = timeit.default_timer() print('2', end_time - start_time) start_time = timeit.default_timer()