def build_graph(cls, args): number_edges = args.number_edges metric = args.metric normalized_laplacian = args.normalized_laplacian coarsening_levels = args.coarsening_levels def grid_graph(m, corners=False): z = graph.grid(m) # compute pairwise distance dist, idx = graph.distance_sklearn_metrics(z, k=number_edges, metric=metric) A = graph.adjacency(dist, idx) # build adjacent matrix # Connections are only vertical or horizontal on the grid. # Corner vertices are connected to 2 neightbors only. if corners: A = A.toarray() A[A < A.max()/1.5] = 0 A = scipy.sparse.csr_matrix(A) print('{} edges'.format(A.nnz)) print("{} > {} edges".format(A.nnz//2, number_edges*m**2//2)) return A g = grid_graph(28, corners=False) g = graph.replace_random_edges(g, 0) graphs, perm = coarsening.coarsen(g, levels=coarsening_levels, self_connections=False) laplacians = [graph.laplacian(g, normalized=True) for g in graphs] cls.perm = perm cls.graphs = graphs cls.laplacians = laplacians
def __init__(self, A, n_out=10): super(GraphCNN, self).__init__() # Precompute the coarsened graphs graphs, pooling_inds = coarsening.coarsen(A, levels=4) # In order to simulate 2x2 max pooling, combine the 4 levels # of graphs into 2 levels by combining pooling indices. graphs, pooling_inds = coarsening.combine(graphs, pooling_inds, 2) self.graph_layers = [] sizes = [32, 64] for i, (g, inds, s) in enumerate(zip(graphs, pooling_inds, sizes)): f = GraphConvolution(None, s, g, K=25) self.add_link('gconv{}'.format(i), f) p = GraphMaxPoolingFunction(inds) self.graph_layers.append((f, p)) self.linear_layers = [] sizes = [512] for i, s in enumerate(sizes): f = L.Linear(None, s) self.add_link('l{}'.format(i), f) self.linear_layers.append(f) self.add_link('cls_layer', L.Linear(None, n_out)) self.train = True
def build_laplacian(k): fullgraph = pickle.load(open(r"C:\Users\veronica\Desktop\study\Deep Learning\Project\full_interactions_graph", 'rb'))[0:k, 0:k] A = csr_matrix(fullgraph).astype(np.float32) graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False) L = [graph.laplacian(A, normalized=True) for A in graphs] pickle.dump(L, open("L"+str(k), 'wb')) pickle.dump(perm, open("prem"+str(k), 'wb')) return L, perm
def gene_graph(self, A): coarsening_levels = 4 L, perm = coarsen(A, coarsening_levels) train_data = perm_data(copy.copy(self.train_data), perm) test_data = perm_data(copy.copy(self.test_data), perm) self.layer1 = (L[0].shape) self.D = self.layer1[0] print(self.D) lmax = [] for i in range(coarsening_levels + 1): lmax.append(lmax_L(L[i])) return train_data, test_data, self.train_label, self.test_label, L, lmax
def build_graph(cls, args): number_edges = args.number_edges metric = args.metric normalized_laplacian = args.normalized_laplacian coarsening_levels = args.coarsening_levels data_dir = 'data/20news' embed_path = os.path.join(data_dir, 'embeddings.npy') graph_data = np.load(embed_path).astype(np.float32) dist, idx = graph.distance_sklearn_metrics(graph_data, k=number_edges, metric=metric) adj_matrix = graph.adjacency(dist, idx) print("{} > {} edges".format(adj_matrix.nnz // 2, number_edges * graph_data.shape[0] // 2)) adj_matrix = graph.replace_random_edges(adj_matrix, 0) graphs, perm = coarsening.coarsen(adj_matrix, levels=coarsening_levels, self_connections=False) laplacians = [ graph.laplacian(g, normalized=normalized_laplacian) for g in graphs ] cls.perm = perm cls.graphs = graphs cls.laplacians = laplacians
X_train = X[:n_train, ...] X_val = X[n_train:, ...] y_train = y[:n_train, ...] y_val = y[n_train:, ...] A = np.load('/Neutron9/joyneel.misra/npys/meanFC_d'+str(d)+'.npy'); A = A - np.min(A) A = scipy.sparse.csr_matrix(A) d = X.shape[1] assert A.shape == (d, d) print('d = |V| = {}, k|V| < |E| = {}'.format(d, A.nnz)) graphs, perm = coarsening.coarsen(A, levels=3, self_connections=False) X_train = coarsening.perm_data(X_train, perm) X_val = coarsening.perm_data(X_val, perm) L = [graph.laplacian(A, normalized=True) for A in graphs] L = [elem.astype(np.float32) for elem in L] params = dict() params['dir_name'] = 'demo' params['num_epochs'] = 10 params['batch_size'] = 40 params['eval_frequency'] = 100 # Building blocks. params['filter'] = 'chebyshev5'
A = A.toarray() A[A < A.max()/1.5] = 0 A = scipy.sparse.csr_matrix(A) print('{} edges'.format(A.nnz)) print("{} > {} edges".format(A.nnz//2, FLAGS.number_edges*m**2//2)) return A t_start = time.process_time() A = grid_graph(28, corners=False) graph.replace_random_edges(A, 0) graphs, perm = coarsening.coarsen(A, levels=FLAGS.coarsening_levels, self_connections=False) L = [graph.laplacian(A, normalized=True,renormalized=True) for A in graphs] #print(L.dtype) #print(L) print('Execution time: {:.2f}s'.format(time.process_time() - t_start)) graph.plot_spectrum(L) print("DONE") del A from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) train_data = mnist.train.images.astype(np.float32) val_data = mnist.validation.images.astype(np.float32) test_data = mnist.test.images.astype(np.float32)
# ----- Load adjacency matrix of LB-operator or graph Laplacian --------------- # L = D-W print('Loading adjacency matrix ...') f_adjacency = h5py.File(Adjfilename, 'r') W = sparse.csr_matrix( (f_adjacency["W"]["data"], f_adjacency["W"]["ir"], f_adjacency["W"]["jc"])) W = W.astype(np.float32) print('Size of W: ', W.shape, '\n') # ----- Graph coarsening ------------------------------------------------------ print('Graph coarsening ...') print('Original: |V| = {} nodes, |E| = {} edges'.format( W.shape[0], int(W.nnz / 3))) graphs, perm = coarsening.coarsen(W, levels=coarsen_level, self_connections=False) # exchange node ids so that binary unions form the clustering tree. data_train = coarsening.perm_data(data_train, perm) data_valid = coarsening.perm_data(data_valid, perm) data_test = coarsening.perm_data(data_test, perm) # ----- Update LB-operator or graph Laplacian for each coarsened level -------- L = [ graph.laplacian(W2, normalized=params['normalized']).transpose() for W2 in graphs ] # ----- Training and validation ----------------------------------------------- n_train = data_train.shape[0] # Number of train samples.
def main(): createFolder('Result') config_file = sys.argv[1] with open(config_file, 'r') as f: config = yaml.load(f) PPI_data = config["PPI_data"] Response_data = config["Response_data"] Gene_data = config["Gene_data"] n_fold = config["n_fold"] test_size = config["test_size"] num_epochs = config["num_epochs"] batch_size = config["batch_size"] brelu = config["brelu"] pool = config["pool"] regularization = config["regularization"] dropout = config["dropout"] learning_rate = config["learning_rate"] decay_rate = config["decay_rate"] momentum = config["momentum"] Name = config["Name"] F = config["F"] K = config["K"] p = config["p"] M = config["M"] data_PPI = pd.read_csv(PPI_data) data_PPI.drop(['Unnamed: 0'], axis='columns', inplace=True) data_IC50 = pd.read_csv(Response_data) data_IC50.drop(['Unnamed: 0'], axis='columns', inplace=True) data_Gene = pd.read_csv(Gene_data) data_Gene.drop(['Unnamed: 0'], axis='columns', inplace=True) data_Gene = np.array(data_Gene) df = np.array(data_PPI) A = coo_matrix(df, dtype=np.float32) print(A.nnz) graphs, perm = coarsening.coarsen(A, levels=6, self_connections=False) L = [graph.laplacian(A, normalized=True) for A in graphs] graph.plot_spectrum(L) n_fold = n_fold PCC = [] SPC = [] RMSE = [] X_train, X_test, Y_train, Y_test = train_test_split(data_Gene, data_IC50, test_size=test_size, shuffle=True, random_state=20) for cv in range(n_fold): Y_pred = np.zeros([Y_test.shape[0], Y_test.shape[1]]) Y_test = np.zeros([Y_test.shape[0], Y_test.shape[1]]) j = 0 for i in range(Y.test.shape[1]): data1 = data_IC50.iloc[:, i] data1 = np.array(data1) data_minmax = data1[~np.isnan(data1)] min = data_minmax.min() max = data_minmax.max() data1 = (data1 - min) / (max - min) train_data_split, test_data_split, train_labels_split, test_labels_split = train_test_split( data_Gene, data1, test_size=test_size, shuffle=True, random_state=20) train_data = np.array( train_data_split[~np.isnan(train_labels_split)]).astype( np.float32) list_train, list_val = Validation(n_fold, train_data, train_labels_split) train_data_V = train_data[list_train[cv]] val_data = train_data[list_val[cv]] test_data = np.array(test_data_split[:]).astype(np.float32) train_labels = np.array( train_labels_split[~np.isnan(train_labels_split)]).astype( np.float32) train_labels_V = train_labels[list_train[cv]] val_labels = train_labels[list_val[cv]] test_labels = np.array(test_labels_split[:]).astype(np.float32) train_data_V = coarsening.perm_data(train_data_V, perm) val_data = coarsening.perm_data(val_data, perm) test_data = coarsening.perm_data(test_data, perm) common = {} common['num_epochs'] = num_epochs common['batch_size'] = batch_size common['decay_steps'] = train_data.shape[0] / common['batch_size'] common['eval_frequency'] = 10 * common['num_epochs'] common['brelu'] = brelu common['pool'] = pool common['regularization'] = regularization common['dropout'] = dropout common['learning_rate'] = learning_rate common['decay_rate'] = decay_rate common['momentum'] = momentum common['F'] = F common['K'] = K common['p'] = p common['M'] = M if True: name = Name params = common.copy() model = models.cgcnn(L, **params) loss, t_step = model.fit(train_data_V, train_labels_V, val_data, val_labels) Y_pred[:, j] = model.predict(test_data) Y_test[:, j] = test_labels j = j + 1 np.savez(('Result/GraphCNN_CV_{}'.format(cv)), Y_true=Y_test, Y_pred=Y_pred)
def prepare(dataset): # # MNIST if dataset == 'mnist': mnist = input_data.read_data_sets( 'datasets', one_hot=False) # load data in folder datasets/ train_data = mnist.train.images.astype(np.float32) val_data = mnist.validation.images.astype(np.float32) test_data = mnist.test.images.astype(np.float32) train_labels = mnist.train.labels val_labels = mnist.validation.labels test_labels = mnist.test.labels print(train_data.shape) print(train_labels.shape) print(val_data.shape) print(val_labels.shape) print(test_data.shape) print(test_labels.shape) # Construct graph t_start = time.time() grid_side = 280 number_edges = 8 metric = 'euclidean' A = grid_graph(grid_side, number_edges, metric) # create graph of Euclidean grid print(A.shape) elif dataset == 'adni': t_start = time.time() train_data, test_data, train_labels, test_labels, A = load_data( train_rate=train_rate, thresh=thresh, binary=binary, num=num, state=state) # 0.35 #A = np.load(saved_path+"_graph_A_2.npy") #A = np.load(saved_path+"_init_graph.npy") #A = np.load("UTA/multi_30_20_3_300_found_graph.npy") #A = np.load("UTA/multi_30_20_3_300_found_graph.npy") #A = np.load("UTA/multi_30_20_3_800_found_graph_left_candidate_465.npy") #A = scipy.sparse.coo_matrix(A) # print(A) # print( scipy.sparse.coo_matrix(A)) #np.save(saved_path+"_init_graph.npy",A.toarray()) # print("data shape ====") # print(train_data.shape) # print(train_data[0][0]) # print(train_labels.shape) # print(test_data.shape) # print(test_labels.shape) # print("data shape end") if verbose: fig = plt.figure() #定义画布为1*1个划分,并在第1个位置上进行作图 ax = fig.add_subplot(111) #定义横纵坐标的刻度 # ax.set_yticks(range(len(yLabel))) # ax.set_yticklabels(yLabel, fontproperties=font) # ax.set_xticks(range(len(xLabel))) # ax.set_xticklabels(xLabel) #作图并选择热图的颜色填充风格,这里选择hot #print("A===") #print(A) im = ax.imshow(A.toarray(), cmap=plt.cm.hot_r) #增加右侧的颜色刻度条 plt.colorbar(im) #增加标题 plt.title("This is the original graph") #show plt.show() # print(train_data.shape) # print(test_data.shape) # print("baseline: ", sum(test_labels)/test_labels.shape[0]) # grid_side = 180 # 102 # number_edges = 101 # metric = 'euclidean' # print(A) # Compute coarsened graphs coarsening_levels = 4 L, perm = coarsen(A, coarsening_levels) #print(L) global layer1 layer1 = (L[0].shape) #print(perm) #print(set(perm)) # Compute max eigenvalue of graph Laplacians lmax = [] for i in range(coarsening_levels): lmax.append(lmax_L(L[i])) #print('lmax: ' + str([lmax[i] for i in range(coarsening_levels)])) # # Reindex nodes to satisfy a binary tree structure train_data = perm_data(train_data, perm) # val_data = perm_data(val_data, perm) test_data = perm_data(test_data, perm) # #print(train_data.shape) #print(val_data.shape) #print(test_data.shape) ''' test part for update graph ''' #a,b = update_graph(A) #print(a[0].shape == (144,144)) #exit() ''' test part for update graph ''' #print('Execution time: {:.2f}s'.format(time.time() - t_start)) del perm return train_data, train_labels, test_data, test_labels, L, lmax, A
def cross_validate_convNN(X, y, adjacency, name_param, value_param, k, num_levels=5): split_index = split_test_train_for_cv(X.shape[0], k_fold=k) graphs, perm = coarsening.coarsen(sp.csr_matrix( adjacency.astype(np.float32)), levels=num_levels, self_connections=False) accuracy = [] loss = [] for param_val in value_param: accuracy_param = [] loss_param = [] for k_ in range(k): test_samples = split_index[k_] train_samples = split_index[~( np.arange(split_index.shape[0]) == k_)].flatten() X_train = X[train_samples] X_test = X[test_samples] y_train = y[train_samples] y_test = y[test_samples] X_train = coarsening.perm_data(X_train, perm) X_test = coarsening.perm_data(X_test, perm) n_train = X_train.shape[0] L = [graph.laplacian(A, normalized=True) for A in graphs] # Conv NN parameters params = dict() params['dir_name'] = 'demo' params['num_epochs'] = 30 params['batch_size'] = 30 params['eval_frequency'] = 30 # Building blocks. params['filter'] = 'chebyshev5' params['brelu'] = 'b1relu' params['pool'] = 'apool1' # Number of classes. C = y.max() + 1 assert C == np.unique(y).size # Architecture. params['F'] = [4, 8] # Number of graph convolutional filters. params['K'] = [3, 3] # Polynomial orders. params['p'] = [2, 8] # Pooling sizes. params['M'] = [ 256, C ] # Output dimensionality of fully connected layers. # Optimization. params['regularization'] = 4e-5 params['dropout'] = 1 params['learning_rate'] = 3e-3 params['decay_rate'] = 0.9 params['momentum'] = 0.8 params['decay_steps'] = n_train / params['batch_size'] params[name_param] = param_val model = models.cgcnn(L, **params) test_acc, train_loss, t_step = model.fit(X_train, y_train, X_test, y_test) accuracy_param.append([max(test_acc), np.mean(test_acc)]) loss_param.append([max(train_loss), np.mean(train_loss)]) print(np.array(accuracy_param)) pm = np.mean(np.array(accuracy_param), axis=0) pl = np.mean(np.array(loss_param), axis=0) print( "IIIII Accuracy: %0.2f (max) %0.2f (mean) Loss: %0.2f (max) %0.2f (mean)" % (pm[0], pm[1], pl[0], pl[1])) accuracy.append(pm) loss.append(pl) return accuracy, loss