def sparsedataload(data_set_name, train_test_validate_designation, data_file_extension, native_binary=True): if native_binary: loaded_sparse_data = AdamMesser.ImportSparse(data_set_name, settype=train_test_validate_designation, binary_formatted_input_file=True) else: data_file_name = data_set_name + '_' + train_test_validate_designation + '.' + data_file_extension loaded_sparse_data = AdamMesser.ImportSparse(data_set_name, settype=train_test_validate_designation) return csc_matrix.todense(loaded_sparse_data)
def print_matrix(H): #print('matrix to print') if isinstance(H, csc_matrix): print_h = csc_matrix.todense(H) print(print_h) else: print(H) return 0
def cosine_dis(word1, word2): """ Calculate cosine similarity between two words. :param word1: (str) First word. :param word2: (str) Second word. :return: (float) cosine similarity. """ from scipy.sparse import csc_matrix from scipy.spatial.distance import cosine from sklearn.feature_extraction.text import CountVectorizer vectorizer = CountVectorizer(analyzer='char') word_vec = vectorizer.fit_transform([word1, word2]) word_vec = csc_matrix.todense(word_vec) word_dis = cosine(word_vec[0], word_vec[1]) return word_dis
def main(args): dataset = args.dataset emb_output_dir = args.output epochs = args.epochs agg = args.agg p = args.p tr = args.tr lam = args.lam lose_func = args.loss # Preprocess dataset adj, views_features = load_data(dataset, num_views=3) adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Calculate pairwise simlarity. views_sim_matrix = {} views_feature_matrix = {} for view in list(views_features.keys()): feature_matrix = csc_matrix.todense(views_features[view]) views_feature_matrix.update({view:feature_matrix}) kernal = "rbf" if lose_func == 'all': attr_sim = cal_attr_sim(views_feature_matrix, dataset) else: attr_sim = 0 # split nodes to train, valid and test datasets, # remove test edges from train adjacent matrix. adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(dataset, adj) print("Masking edges Done!") adj = adj_train nx_G = nx.from_numpy_array(adj.toarray()) num_nodes = adj.shape[0] adj_norm = preprocess_graph(adj) views_features_num = {} views_features_nonzero = {} for view in list(views_features.keys()): views_features[view] = sparse_to_tuple(views_features[view].tocoo()) views_features_num.update({view:views_features[view][2][1]}) views_features_nonzero.update({view:views_features[view][1].shape[0]}) # Build model MagCAE = {} for view in list(views_features.keys()): x,y = views_features[view][2][0], views_features[view][2][1] model = GAE(y, views_features_nonzero[view], adj_norm, math.ceil(2*p*y), math.ceil(p*y)) MagCAE.update({view:model}) # Loss function and optimizer. # loss weight taken by each nodes to the total loss. pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) /adj.sum() norm = adj.shape[0] * adj.shape[0] / float(adj.shape[0] * adj.shape[0] - adj.sum())*2 optimizer = tf.keras.optimizers.Adam() adj_targ = adj_train + sp.eye(adj_train.shape[0]) adj_targ = sparse_to_tuple(adj_targ) indices= np.array(adj_targ[0]) values = np.array(adj_targ[1]) dense_shape = np.array(adj_targ[2]) sparse_targ = tf.SparseTensor(indices = indices, values = values, dense_shape = dense_shape) sparse_targ = tf.cast(sparse_targ, dtype=tf.float32) adj_targ = tf.sparse.to_dense(sparse_targ) adj_targ = tf.reshape(adj_targ,[-1]) # Train and Evaluate Model # Training Loop: # In each epoch: views - > view_embedding -> aggregate embedding -> total loss -> update gradients decoder = Decoder(100) for epoch in range(epochs): loss = 0 start = time.time() with tf.GradientTape() as tape: ag_embedding ={} for VAE in list(MagCAE.keys()): v_embedding, a_hat = MagCAE[VAE](views_features[VAE]) ag_embedding.update({VAE:v_embedding}) # aggregate embeddings embedding, aggregator = aggregate_embeddings(ag_embedding, agg) # reconstruct a_hat a_hat = decoder(embedding) loss += loss_function(a_hat, adj_targ, pos_weight, norm, attr_sim, embedding, num_nodes, lam, lose_func) if agg == "weighted_concat": variables = MagCAE['view1'].trainable_variables + MagCAE['view2'].trainable_variables + MagCAE['view3'].trainable_variables + aggregator.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) # Evaluate on validate set embedding = np.array(embedding) roc_cur, ap_cur, _, _ = evaluate(val_edges, val_edges_false, adj_orig, embedding) print("Epoch {}: Val_Roc {:.4f}, Val_AP {:.4f}, Time Consumed {:.2f} sec\n".format(epoch+1, roc_cur, ap_cur, time.time()-start)) print("Training Finished!") # Evaluation Result on test Edges test_embedding= {} for VAE in list(MagCAE.keys()): v_embedding, a_hat = MagCAE[VAE](views_features[VAE]) test_embedding.update({VAE:v_embedding}) # aggregate embeddings embedding, aggregator = aggregate_embeddings(test_embedding, agg) embedding = np.array(embedding) # embedding is a tensor, convert to np array. # reconstruct a_hat test_roc, test_ap, fpr, tpr = evaluate(test_edges, test_edges_false, adj_orig, embedding) print("MagCAE test result on {}".format(dataset)) print("Test Roc: {}, Test AP: {}, P: {}, Training Ratio: {}, Lambda: {}.".format(test_roc, test_ap, p, tr, lam))
nclu1:nclu1 + patch_size_hr - 1] patch_feat = feat_lr[nrlu2:nrlu2 + patch_size_feat - 1, nclu2:nclu2 + patch_size_feat - 1] aggr_times_local = aggr_time[nrlu1:nrlu1 + patch_size_hr - 1, nclu1:nclu1 + patch_size_hr - 1] idx_nnz = (patch_hr[:] != 0) patch_hr_data = patch_hr[:] - np.mean(patch_hr[idx_nnz]) / dim_hr patch_feat = patch_feat[:] / dim_feat patch_data = [patch_hr_data[idx_nnz], patch_feat] dict_temp = np.concatenate( (dictionary.dict_hr[idx_nnz, :], dictionary.dict_lr), axis=1) if params.train_method == 'omp': alpha = spams.OMP(patch_data, dict_temp) # params.solve_param) elif params.train_method == 'lasso': alpha = spams.Lasso(patch_data, D=dict_temp) patch_recov = dictionary.dict_hr * csc_matrix.todense(aplha) patch_recov = np.reshape(patch_recov, np.shape([patch_size_hr, patch_size_hr ])) * dim_hr + local_mean patch_recov = np.divide( np.multiply((patch_recov + patch_hr, aggr_times_local), (1 + aggr_times_local))) aggr_times_local = aggr_times_local + 1 aggr_times_local[nrlu1:nrlu1 + patch_size_hr - 1, nclu1:nclu1 + patch_size_hr - 1] = aggr_times_local img_out[nrlu1:nrlu1 + patch_size_hr - 1, nclu1:nclu1 + patch_size_hr - 1] = patch_recov
def bose_Hamiltonian(**args): #Hamiltonian needs as input: #...... Parameter: DIM_H if COMM.rank == 0: DIM_H = np.int(args.get("DIM_H")) ll = args.get("ll") nn = args.get("nn") #Hamiltonian returns: #...... Sparse or Dense matrix. By default is SPARSE mat_type = args.get("mat_type") if mat_type == None: mat_type = 'Sparse' ############### MPI VERSION if COMM.rank == 0: jobs = list(range(DIM_H)) jobs = split(jobs, COMM.size) else: jobs = None jobs = COMM.scatter(jobs, root=0) XX = [] YY = [] AA = [] for i in jobs: res = ham.evaluate_ham(i, **args) XX.append(res[0]) YY.append(res[1]) AA.append(res[2]) XX0 = MPI.COMM_WORLD.gather(XX, root=0) YY0 = MPI.COMM_WORLD.gather(YY, root=0) AA0 = MPI.COMM_WORLD.gather(AA, root=0) if COMM.rank == 0: X0 = [item for sublist in XX0 for item in sublist] Y0 = [item for sublist in YY0 for item in sublist] A0 = [item for sublist in AA0 for item in sublist] print("Results:", 'porcodio') X1 = [item for sublist in X0 for item in sublist] Y1 = [item for sublist in Y0 for item in sublist] A1 = [item for sublist in A0 for item in sublist] Hamiltonian = csc_matrix((A1, (X1, Y1)), shape=(DIM_H, DIM_H), dtype=np.double) ff.print_matrix(Hamiltonian) if mat_type == 'Dense': Hamiltonian = csc_matrix.todense(Hamiltonian) return Hamiltonian
train_y = UCIDataWrangling.textdataload(data_set_name_to_use, 'train', 'labels') valid_y = UCIDataWrangling.textdataload(data_set_name_to_use, 'valid', 'labels') from sklearn.random_projection import johnson_lindenstrauss_min_dim as jlmd jlmd_start_time = default_timer() num_samples = dense_trainData.shape[0] train_number_of_features = dense_trainData.shape[1] valid_number_of_features = dense_validData.shape[1] if train_number_of_features != valid_number_of_features: num_features = min(dense_trainData.shape[1], dense_validData.shape[1]) print '\n' + 'Analyzing how many of {0} possible features to keep out of training data set.'.format(num_features) if train_number_of_features < valid_number_of_features: dense_validData = matrix_delete(dense_validData, numpy.s_[train_number_of_features:valid_number_of_features], axis=1) temp_validData = csc_matrix.todense(sparse_validData) temp_validData = matrix_delete(temp_validData, numpy.s_[train_number_of_features:valid_number_of_features], axis=1) sparse_validData = csc_matrix(temp_validData) else: dense_trainData = matrix_delete(dense_trainData, numpy.s_[valid_number_of_features:train_number_of_features], axis=1) temp_trainData = csc_matrix.todense(sparse_trainData) temp_trainData = matrix_delete(temp_trainData, numpy.s_[valid_number_of_features:train_number_of_features], axis=1) sparse_trainData = csc_matrix(temp_trainData) else: num_features = train_number_of_features # Re-establish how many features are possible prior to running Johnson-Lindenstrauss algorithm train_number_of_features = dense_trainData.shape[1]
def bose_Hamiltonian_parity_fast(**args): DIM_H = np.int(args.get("DIM_H")) BASE_bin = args.get("BASE_bin") BASE_bose = args.get("BASE_bose") mat_type = args.get("mat_type") b_p_inp = args.get("parity_index") b_p = np.asarray(b_p_inp) len_sym = args.get("sim_sec_len") len_b_p = len(b_p) len_asym = DIM_H - len_sym X0_s = [] Y0_s = [] A0_s = [] X0_a = [] Y0_a = [] A0_a = [] for i in range(len_b_p): if b_p[i, 0] < 0: continue X_1, Y_1, A_1 = ham.evaluate_ham(b_p[i, 1], **args) X_2, Y_2, A_2 = ham.evaluate_ham(b_p[i, 2], **args) X = [item for sublist in [X_1, X_2] for item in sublist] Y = [item for sublist in [Y_1, Y_2] for item in sublist] A = [item for sublist in [A_1, A_2] for item in sublist] for j in range(len(A)): state_X_0 = BASE_bin[X[j]] state_Y_0 = BASE_bin[Y[j]] state_X_rev = state_X_0[::-1] state_Y_rev = state_Y_0[::-1] ind_X = X[j] ind_X_rev = ff.get_index(state_X_rev, **args) ind_Y = Y[j] ind_Y_rev = ff.get_index(state_Y_rev, **args) ind_col_X = b_p[min(ind_X, ind_X_rev), 0] ind_col_Y = b_p[min(ind_Y, ind_Y_rev), 0] coef_s = 2 ##.... SYM SEC if ind_X == ind_X_rev: ind_col_X = b_p[ind_X, 0] coef_s = 2 * np.sqrt(2) X0_s.append(ind_col_X) if ind_Y == ind_Y_rev: ind_col_Y = b_p[ind_Y, 0] coef_s = 2 / np.sqrt(2) if ind_X == ind_X_rev: coef_s = 2 Y0_s.append(ind_col_Y) A0_s.append(A[j] / coef_s) ##.... A_SYM SEC for j in range(len(A_1)): state_X_0 = BASE_bin[X[j]] state_Y_0 = BASE_bin[Y[j]] state_X_rev = state_X_0[::-1] state_Y_rev = state_Y_0[::-1] ind_X = X[j] ind_X_rev = ff.get_index(state_X_rev, **args) ind_Y = Y[j] ind_Y_rev = ff.get_index(state_Y_rev, **args) ind_col_X = b_p[min(ind_X, ind_X_rev), 3] ind_col_Y = b_p[min(ind_Y, ind_Y_rev), 3] if Y[j] > ind_Y_rev: coef_a = -1 elif Y[j] < ind_Y_rev: coef_a = 1 else: continue X0_a.append(ind_col_X + len_sym) Y0_a.append(ind_col_Y + len_sym) A0_a.append(A[j] * coef_a) X = [item for sublist in [X0_a, X0_s] for item in sublist] Y = [item for sublist in [Y0_a, Y0_s] for item in sublist] A = [item for sublist in [A0_a, A0_s] for item in sublist] #Hamiltonian = csc_matrix((A, (X,Y)), shape=(DIM_H,DIM_H), dtype=np.double) Hamiltonian = csc_matrix((A, (X, Y)), shape=(DIM_H, DIM_H), dtype=np.double) #Hamiltonian_sym = csc_matrix((A0_s, (X0_s,Y0_s)), shape=(len_sym,len_sym), dtype=np.double) #Hamiltonian_asym = csc_matrix((A0_a, (X0_a,Y0_a)), shape=(len_asym,len_asym), dtype=np.double) if mat_type == 'Dense': Hamiltonian = csc_matrix.todense(Hamiltonian) #Hamiltonian_sym = csc_matrix.todense(Hamiltonian_sym) #Hamiltonian_asym = csc_matrix.todense(Hamiltonian_asym) #ff.print_matrix(Hamiltonian) #ff.print_matrix(Hamiltonian_sym) #ff.print_matrix(Hamiltonian_asym) return Hamiltonian #_sym, Hamiltonian_asym