def process_data(self): data = load_data('cora') adj, feas = data[:2] self.adj = adj.todense() self.normed_adj = preprocess_adj(adj) self.feas = preprocess_features(feas, False) self.y_train, self.y_val, self.y_test = data[2:5] self.train_mask, self.val_mask, self.test_mask = data[5:]
def sgc_precompute(features, adj, K=2): adj = preprocess_adj(adj).tocoo() features = features.tocoo() t = perf_counter() for _ in range(K): features = adj.dot(features) precompute_time = perf_counter() - t return features, precompute_time
def run(args): ( adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size, test_size, ) = load_corpus(args.select_data) train_mask = train_mask + val_mask y_train = y_train + y_val adj_dense = preprocess_adj(adj).toarray().astype(np.float32) features_dense = preprocess_features(features).toarray().astype(np.float32) y_train = y_train.astype(np.float32) y_test = y_test.astype(np.float32) train_mask = train_mask.astype(np.float32) test_mask = test_mask.astype(np.float32) gcn_model = GCN( tf.convert_to_tensor(adj_dense), layers=args.layers, hidden_size=args.hidden_size, dropout=args.dropout, ) loss_fn = masked_softmax_cross_entropy # acc_fn = masked_accuracy optimizer = Adam(learning_rate=args.lr) # print("Model Layers: ", gcn_model.trainable_variables) model_textGCN = TextGCN(model=gcn_model, loss=loss_fn, optimizer=optimizer, args=args) model_textGCN.train(features_dense, y_train, train_mask) sns.distplot(model_textGCN.train_accuracy) plt.savefig("train_acc.png") plt.clf() sns.distplot(model_textGCN.train_losses) plt.savefig("train_losses.png") eval_result = model_textGCN.evaluate(features_dense, y_test, test_mask) print(f"Final Evaluation Result: {eval_result}")
def __init__(self, adj, x, y, W, b, K=2, normalize_grad=True): self.num_classes = y.max() + 1 self.normalize_grad = normalize_grad self.num_nodes = adj.shape[0] self.K = K self.surrogate = Surrogate(x @ W, b, K=K) self.shape = (self.num_nodes, self.num_nodes) self.adj = adj self.adj_sparse = utils.sparse_to_tuple(utils.preprocess_adj(adj)) self.y = y self.y_onehot = np.eye(int(self.num_classes))[y]
def __init__(self, args): print("prepare data") self.graph_path = "data/graph" self.args = args # graph graph = nx.read_weighted_edgelist(f"{self.graph_path}/{args.dataset}.txt" , nodetype=int) print_graph_detail(graph) adj = nx.to_scipy_sparse_matrix(graph, nodelist=list(range(graph.number_of_nodes())), weight='weight', dtype=np.float) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) self.adj = preprocess_adj(adj, is_sparse=True) # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # features self.nfeat_dim = graph.number_of_nodes() row = list(range(self.nfeat_dim)) col = list(range(self.nfeat_dim)) value = [1.] * self.nfeat_dim shape = (self.nfeat_dim, self.nfeat_dim) indices = th.from_numpy( np.vstack((row, col)).astype(np.int64)) values = th.FloatTensor(value) shape = th.Size(shape) self.features = th.sparse.FloatTensor(indices, values, shape) # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # target target_fn = f"data/text_dataset/{self.args.dataset}.txt" target = np.array(pd.read_csv(target_fn, sep="\t", header=None)[2]) target2id = {label: indx for indx, label in enumerate(set(target))} self.target = [target2id[label] for label in target] self.nclass = len(target2id) # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # train val test split self.train_lst, self.test_lst = get_train_test(target_fn)
def main(args): # save_dir = args.save_dir log_dir = args.log_dir train_dir = args.data_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = utils.load_data( args.data_type) features = utils.preprocess_features(features) support = [utils.preprocess_adj(adj)] args.num_supports = 1 args.input_size, args.features_size = features[2][1], features[2] args.output_size = y_train.shape[1] config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) model = GCN(args, sess, name="gcn") summary_writer = tf.summary.FileWriter(log_dir) for epoch in range(1, args.nb_epoch + 1): epoch_start_time = time.time() feed_dict = utils.construct_feed_dict(model, features, support, y_train, train_mask) _, train_loss, train_acc, summaries = model.train(feed_dict) if epoch % args.summary_epoch == 0: summary_writer.add_summary(summaries, epoch) if epoch % args.print_epoch == 0: feed_dict_val = utils.construct_feed_dict(model, features, support, y_val, val_mask) val_loss, val_acc = model.evaluate(feed_dict_val) print "epoch %d, train_loss %f, train_acc %f, val_loss %f, val_acc %f, time %.5fs" % \ (epoch, train_loss, train_acc, val_loss, val_acc, time.time()-epoch_start_time) if args.anneal and epoch >= args.anneal_start: sess.run(model.lr_decay_op) model.saver.save(sess, os.path.join(save_dir, "model.ckpt")) print "Model stored...."
def mics_graph_matrix(num_subject, graph_folder, GRAPH_ADJ, FILTER, MAX_DEGREE): """Generate graph matrix for GCNN Args: num_subject (int): number of subject for data graph_folder (str): location of folder for graph GRAPH_ADJ (str): the filename of graph FILTER (str): type of gcnn filter MAX_DEGREE (int): degree of Chebyshev polynomial Returns: Tuple: contains the graph_matrix and number of support used for GCNN Raises: Exception: invalid FILTER type """ SYM_NORM = True # symmetric (True) vs. left-only (False) normalization # build the graph A = load_graph(dimension=num_subject, path=graph_folder, graph=GRAPH_ADJ) # estimate the laplacian if FILTER == 'localpool': """ Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) """ print('Using local pooling filters...') A_ = preprocess_adj(A, SYM_NORM) support = 1 graph_matrix = [A_] elif FILTER == 'chebyshev': """ Chebyshev polynomial basis filters (Defferard et al., NIPS 2016) """ print('Using Chebyshev polynomial basis filters...') L = normalized_laplacian(A, SYM_NORM) L_scaled = rescale_laplacian(L) T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE) support = MAX_DEGREE + 1 graph_matrix = T_k else: raise Exception('Invalid filter type.') return graph_matrix, support
def get_feature(dataset): Features_decrease, adj_decrease, edge_decrease, full_feature_decrease = [], [], [], [] Interactions, smiles = [], [] for x, label, w, smile in dataset.itersamples(): # The smile is used to extract molecular fingerprints smiles.append(smile) interaction = label Interactions.append(interaction) mol = Chem.MolFromSmiles(smile) if not mol: raise ValueError("Could not parse SMILES string:", smile) # increased order feature_increase = x.get_atom_features() iAdjTmp_increase = create_adjacency(mol) # decreased order # Turn the data upside down feature_decrease = flip(feature_increase, 0) iAdjTmp_decrease = flip(iAdjTmp_increase, 0) # Obtaining fixed-size molecular input data iFeature_decrease, adjacency_decrease = fix_input( feature_decrease, iAdjTmp_decrease) Features_decrease.append(np.array(iFeature_decrease)) normed_adj_decrease = preprocess_adj(adjacency_decrease) adj_decrease.append(normed_adj_decrease) #Transforms data into PyTorch Geometrics specific data format. index = np.array(np.where(iAdjTmp_decrease == 1)) edge_index = torch.from_numpy(index).long() edge_decrease.append(edge_index) feature = torch.from_numpy(feature_decrease.copy()).float() full_feature_decrease.append(feature) return Features_decrease, adj_decrease, edge_decrease, full_feature_decrease, Interactions, smiles
def build_model(adj, features, n_classes): placeholders = { 'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)), 'labels': tf.placeholder(tf.float32, shape=(None, n_classes)), 'labels_mask': tf.placeholder(tf.int32), 'noise': tf.placeholder(tf.float32, shape=()), 'dropout': tf.placeholder_with_default(0., shape=()), 'alfa': tf.placeholder(tf.float32, shape=()), 'beta': tf.placeholder(tf.float32, shape=()), } if FLAGS.model == 'COOL': support = [sparse_to_tuple(preprocess_adj(adj))] model_func = GCN elif FLAGS.model == 'COOLnorm': support = [ sparse_to_tuple( preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold)) ] model_func = GCN else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) placeholders['support'] = [ tf.sparse_placeholder(tf.float32) for _ in support ] model = model_func(placeholders) return model, support, placeholders
def __init__(self, smiles): featurizer = dc.feat.graph_features.ConvMolFeaturizer() self.Full_features, self.Full_normed_adj, self.Full_fringer, self.Full_interactions = [], [], [], [] for i, smile in enumerate(smiles): mol = Chem.MolFromSmiles(str(smile)) if not mol: raise ValueError("Could not parse SMILES string:", smile) x = featurizer.featurize([mol])[0] # increased order feature_increase = x.get_atom_features() iAdjTmp_increase = create_adjacency(mol) # decreased order # Turn the data upside down feature_decrease = flip(feature_increase, 0) iAdjTmp_decrease = flip(iAdjTmp_increase, 0) # Obtaining fixed-size molecular input data iFeature_decrease, adjacency_decrease = fix_input(feature_decrease, iAdjTmp_decrease) Features_decrease = np.array(iFeature_decrease) adj_decrease = preprocess_adj(adjacency_decrease) fingerprints = calc(mol)[:fingerprint_size] self.Full_features.append(Features_decrease) self.Full_normed_adj.append(adj_decrease) self.Full_fringer.append(fingerprints) self.Full_interactions.append([0]) self.Full_features = tensoring(self.Full_features) self.Full_normed_adj = tensoring(self.Full_normed_adj) self.Full_fringer = tensoring(self.Full_fringer) self.Full_interactions = tensoring(self.Full_interactions) self.dataset = list(zip(np.array(self.Full_features), np.array(self.Full_normed_adj), np.array(self.Full_fringer), np.array(self.Full_interactions)))
def get_data(dataset): # Load output_data (adj, y_train, y_val, y_test, train_mask, val_mask, test_mask, train_size, test_size) = utils.load_data(dataset) features = sparse.identity(adj.shape[1]) # Some preprocessing features = utils.preprocess_features(features) support = [utils.preprocess_adj(adj)] # Define placeholders t_features = torch.from_numpy(features) t_y_train = torch.from_numpy(y_train) t_y_val = torch.from_numpy(y_val) t_y_test = torch.from_numpy(y_test) t_train_mask = torch.from_numpy(train_mask.astype(np.float32)) t_support = [] for i in range(len(support)): t_support.append(torch.Tensor(support[i])) return (t_features, t_y_train, t_y_val, t_y_test, t_train_mask, t_support, val_mask, test_mask, train_size, test_size)
def get_model_and_support(model_string, adj, initial_train, train_mask, val_mask, test_mask, with_test): if model_string == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 model_func = GCN if with_test: sub_sampled_support = support else: # cut the test and validation features initial_sample_list = get_list_from_mask(initial_train) sub_sampled_support = get_masked_adj(adj, initial_sample_list) sub_sampled_support = [preprocess_adj(sub_sampled_support)] # A = adj.toarray()[initial_sample_list] # one_hop_sample_list = np.argwhere(np.sum(A, axis=0)) # sub_sampled_support_second = [ # get_sub_sampled_support(complete_support=support[0], node_to_keep=one_hop_sample_list) # ] return model_func, support, sub_sampled_support, num_supports elif model_string == 'gcn_cheby': support = chebyshev_polynomials(adj, FLAGS.max_degree) sub_sampled_support = support num_supports = 1 + FLAGS.max_degree model_func = GCN elif model_string == 'dense': support = [preprocess_adj(adj)] # Not used sub_sampled_support = support # Not used num_supports = 1 model_func = MLP elif model_string == 'k-nn': support = [preprocess_adj(adj)] # Not used sub_sampled_support = support # Not used num_supports = 1 model_func = None elif model_string == 'gcn_subsampled': # FLOFLO's making support = [preprocess_adj(adj)] num_supports = 1 model_func = GCN if with_test: initial_sample_list = get_list_from_mask(train_mask + val_mask + test_mask) else: initial_sample_list = get_list_from_mask(train_mask) sub_sampled_support = get_masked_adj(adj, initial_sample_list) sub_sampled_support = [preprocess_adj(sub_sampled_support)] # A = adj.toarray()[initial_sample_list] # one_hop_sample_list = np.argwhere(np.sum(A, axis=0)) # sub_sampled_support_second = [ # get_sub_sampled_support(complete_support=support[0], node_to_keep=one_hop_sample_list) # ] return model_func, support, sub_sampled_support, num_supports else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) return model_func, support, sub_sampled_support, num_supports
# ============================================================================= # Data loading # ============================================================================= train_df = utils.datafeeder2(np.load("trainX.npy"),\ np.load("trainY.npy")) valid_df = utils.datafeeder2(np.load("validX.npy"),\ np.load("validY.npy")) test_df = utils.datafeeder2(np.load("testX.npy"),\ np.load("testY.npy")) # ============================================================================= # PPI network loading and D^(-1/2)AD^(-1/2) matrix calculation # ============================================================================= #Real ppi ppi_matrix = np.load("ppi2.npy") nom_adj_matrix = utils.preprocess_adj(ppi_matrix) nom_adj_matrix2 = utils.preprocess_adj2(ppi_matrix) # Fake ppi fake_ppi = utils.ransomize_ppi(ppi_matrix) nom_fake = utils.preprocess_adj(fake_ppi) nom_fake2 = utils.preprocess_adj2(fake_ppi) # No interaction at all nom_noitx = utils.preprocess_adj(np.zeros((16559, 16559))) nom_noitx2 = utils.preprocess_adj2(np.zeros((16559, 16559))) # ============================================================================= # Training models # ============================================================================= model1 = PPiConv(nom_adj_matrix, gc.convolutionGraph)
def __init__(self, adj, x, y, hidden=16, name="", with_relu=True, params_dict={'dropout': 0.5}, gpu_id=None, seed=None): adj = utils.preprocess_adj(adj) num_features = x.shape[1] num_classes = y.max() + 1 self.graph = tf.Graph() with self.graph.as_default(): if seed: tf.set_random_seed(seed) with tf.variable_scope(name) as scope: w_init = glorot_uniform self.name = name self.dropout = params_dict. get('dropout', 0.) if not with_relu: self.dropout = 0 self.learning_rate = params_dict. get('learning_rate', 0.01) self.weight_decay = params_dict. get('weight_decay', 5e-4) self.N, self.D = x.shape self.node_ids = tf.placeholder(tf.int32, [None], 'node_ids') self.node_labels = tf.placeholder(tf.int32, [None, num_classes], 'node_labels') # bool placeholder to turn on dropout during training self.training = tf.placeholder_with_default(False, shape=()) self.labels = np.eye(num_classes)[y] self.adj = tf.SparseTensor(*utils.sparse_to_tuple(adj)) self.adj = tf.cast(self.adj, tf.float32) self.X_sparse = tf.SparseTensor(*utils.sparse_to_tuple(x)) self.X_sparse = tf.cast(self.X_sparse, tf.float32) self.X_dropout = sparse_dropout(self.X_sparse, 1 - self.dropout, (int(self.X_sparse.values.get_shape()[0]),)) # only use drop-out during training self.X_comp = tf.cond(self.training, lambda: self.X_dropout, lambda: self.X_sparse) if self.dropout > 0. else self.X_sparse self.W1 = tf.get_variable('W1', [self.D, hidden], tf.float32, initializer=w_init()) self.b1 = tf.get_variable('b1', dtype=tf.float32, initializer=tf.zeros(hidden)) self.h1 = spdot(self.adj, spdot(self.X_comp, self.W1)) if with_relu: self.h1 = tf.nn.relu(self.h1 + self.b1) self.h1_dropout = tf.nn.dropout(self.h1, rate=self.dropout) self.h1_comp = tf.cond(self.training, lambda: self.h1_dropout, lambda: self.h1) if self.dropout > 0. else self.h1 self.W2 = tf.get_variable('W2', [hidden, num_classes], tf.float32, initializer=w_init()) self.b2 = tf.get_variable('b2', dtype=tf.float32, initializer=tf.zeros(num_classes)) self.logits = spdot(self.adj, dot(self.h1_comp, self.W2)) if with_relu: self.logits += self.b2 self.logits_gather = tf.gather(self.logits, self.node_ids) self.predictions = tf.nn.softmax(self.logits_gather) self.loss_per_node = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_gather, labels=self.node_labels) self.loss = tf.reduce_mean(self.loss_per_node) # weight decay only on the first layer, to match the original implementation if with_relu: self.loss += self.weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in [self.W1, self.b1]]) var_l = [self.W1, self.W2] if with_relu: var_l.extend([self.b1, self.b2]) self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss, var_list=var_l) self.varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) self.local_init_op = tf.variables_initializer(self.varlist) if gpu_id is None: config = tf.ConfigProto( device_count={'GPU': 0} ) else: gpu_options = tf.GPUOptions(visible_device_list='{}'.format(gpu_id), allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) self.session = tf.Session(config=config) self.init_op = tf.global_variables_initializer() self.session.run(self.init_op)
def build_model(adj, features, n_classes, subgraphs): perturbation = None placeholders = { 'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)), 'labels': tf.placeholder(tf.float32, shape=(None, n_classes)), 'labels_mask': tf.placeholder(tf.int32), 'noise': tf.placeholder(tf.float32, shape=()), 'dropout': tf.placeholder_with_default(0., shape=()), } if FLAGS.model == 'gcn': support = [sparse_to_tuple(preprocess_adj(adj))] model_func = GCN elif FLAGS.model == 'gcnR': support = [sparse_to_tuple(adj)] model_func = GCN elif FLAGS.model == 'gcnT': support = [ sparse_to_tuple( preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold)) ] model_func = GCN elif FLAGS.model == 'fishergcn' or FLAGS.model == 'fishergcnT': if FLAGS.model == 'fishergcn': A = preprocess_adj(adj) else: A = preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold) N = adj.shape[0] L = sp.eye(N) - A if FLAGS.fisher_freq == 0: #nsubgraphs = subgraphs.shape[1] #V = block_krylov( A, FLAGS.fisher_rank+nsubgraphs ) #V = V[:,:FLAGS.fisher_rank] V = block_krylov(A, FLAGS.fisher_rank) w = (sp.csr_matrix.dot(L, V) * V).sum(0) elif FLAGS.fisher_freq == 1: # if the graph contains one large component and small isolated components # only perturb the largest connected component subgraph_sizes = subgraphs.sum(0) largest_idx = np.argmax(subgraph_sizes) isolated = np.nonzero(1 - subgraphs[:, largest_idx])[0] L = L.tolil() L[:, isolated] = 0 L[isolated, :] = 0 L = L.tocsr() V = block_krylov(L, FLAGS.fisher_rank) w = (sp.csr_matrix.dot(L, V) * V).sum(0) elif FLAGS.fisher_freq == 2: V, _ = np.linalg.qr(np.random.randn(N, FLAGS.fisher_rank)) w = np.ones(FLAGS.fisher_rank) else: print('unknown frequency:', FLAGS.fisher_freq) sys.exit(0) perturbation = make_perturbation(V, w, placeholders['noise'], FLAGS.fisher_adversary) support = [sparse_to_tuple(A)] model_func = GCN elif FLAGS.model == 'chebynet': support = chebyshev_polynomials(adj, FLAGS.max_degree) model_func = GCN elif FLAGS.model == 'mlp': support = [sparse_to_tuple(preprocess_adj(adj))] model_func = MLP else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) try: _, _values, _shape = support[0] print("sparsity: {0:.2f}%".format(100 * (_values > 0).sum() / (_shape[0] * _shape[1]))) except: pass placeholders['support'] = [ tf.sparse_placeholder(tf.float32) for _ in support ] model = model_func(placeholders, perturbation=perturbation, subgraphs=subgraphs) return model, support, placeholders
val_mask = np.zeros(n, dtype=bool) test_mask = np.zeros(n, dtype=bool) train_mask[train_index[0:val_cut]] = True val_mask[train_index[val_cut:]] = True test_mask[test_index] = True y_train = np.zeros(labels.shape, dtype=int) y_val = np.zeros(labels.shape, dtype=int) y_test = np.zeros(labels.shape, dtype=int) y_train[train_mask, :] = labels[train_mask, :] y_val[val_mask, :] = labels[val_mask, :] y_test[test_mask, :] = labels[test_mask, :] masked_adjacency = get_masked_adj(adj, train_index[0:val_cut]) masked_adjacency = preprocess_adj(masked_adjacency) adjacency = preprocess_adj(adj) # masked_adjacency = preprocess_adj(masked_adjacency) #Remove links for the first adjacency hyperparam_search = [] # Define model evaluation function def evaluate(sess, features, adjacency, masked_adjacency, labels, mask, placeholders): t_test = time.time() feed_dict_val = construct_feed_dict(features, adjacency, labels, mask, masked_adjacency, placeholders) outs_val = sess.run([model.loss, model.accuracy, model.predict()],
def train(G): print(G.graph) print('Extracting user graph...') userG = user_graph(G) # labels print('Obtaining labels...') future = pd.read_csv( constants.DATA_HOME + "user_scores/{}_2014_wf{:02d}.csv".format("politics", 1)) future = future.set_index("user") future['label'] = np.sign(future["sum"] - np.percentile(future["sum"], 90) - 1e-10) labels = [] for userid in userG.nodes(): if userid in future.index: labels.append(future.loc[userid]['label']) else: #print('%s does not have label.' % userid) userG.remove_node(userid) labels = np.array(labels, dtype=np.int) # convert to 0/1 labels labels = [0 if l < 0 else 1 for l in labels] print('Extracting user features...') max_deg = max(G.degree(userG.nodes()).values()) print('Max user degree: ', max_deg) features = [] neighbor_dict = {} feature_size = 0 for node in userG.nodes(): #feature_size, feature_idx = extract_user_features_simple(G, node, max_deg, neighbor_dict) feature_size, feature_idx = extract_user_features_simple( G, node, max_deg) features.append(feature_idx) # a list of all features corresponding to posts/comments #neighbor_features = np.zeros((len(neighbor_dict), feature_size)) #for idx, vec in neighbor_dict.values(): # neighbor_features[idx] = vec features = np.stack(features, axis=0) #features = np.array(features) print('Feature dimensions: ', features.shape) # data split n = userG.number_of_nodes() n1 = int(math.ceil(n * 0.7)) n2 = int(math.ceil(n * 0.8)) train_mask = np.array([1 if i < n1 else 0 for i in range(n)]) val_mask = np.array([1 if n1 <= i < n2 else 0 for i in range(n)]) test_mask = np.array([1 if n2 <= i else 0 for i in range(n)]) train_labels = np.zeros((n, 2)) train_labels[np.arange(n1), labels[:n1]] = 1 val_labels = np.zeros((n, 2)) val_labels[np.arange(n1, n2), labels[n1:n2]] = 1 test_labels = np.zeros((n, 2)) test_labels[np.arange(n2, n), labels[n2:]] = 1 adj = nx.adjacency_matrix(userG) # Define placeholders placeholders = { 'support': [tf.sparse_placeholder(tf.float32)], 'features': tf.placeholder(tf.float32, shape=(None, features.shape[1], features.shape[2])), #'features': tf.placeholder(tf.float32, shape=(None, features.shape[1])), #'neighbor_features': tf.placeholder(tf.float32, shape=neighbor_features.shape), 'labels': tf.placeholder(tf.float32, shape=(None, train_labels.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()), } # neural network model layer_sizes = [features.shape[2], 10, 1] model = GCN_multipartite(placeholders, layer_sizes, logging=True) # Memory usage options config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION init = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() # Start running operations on the Graph. sess = tf.Session(config=config) sess.run(init) summary_writer = tf.summary.FileWriter(FLAGS.train_log_dir, sess.graph) cost_val = [] # Train model print('Training...') for epoch in range(FLAGS.epochs): support = [utils.preprocess_adj(adj)] # Construct feed dictionary feed_dict = utils.construct_feed_dict(features, support, train_labels, train_mask, placeholders, sparse_inputs=False) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Training step start_time = time.time() outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict) duration = time.time() - start_time print(duration) # Validation cost, acc, y_pred, duration_val = evaluate(sess, model, features, support, val_labels, val_mask, placeholders) cost_val.append(cost) y_true = np.argmax(val_labels, 1) y_pred = y_pred[n1:n2] y_true = y_true[n1:n2] precision = sk.metrics.precision_score(y_true, y_pred) recall = sk.metrics.recall_score(y_true, y_pred) f1 = sk.metrics.f1_score(y_true, y_pred) # Print results print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]), "train_time=", "{:.5f}".format(duration), "val_acc=", "{:.5f}".format(acc), 'val_f1=', '{:.5f}'.format(f1), "val_time=", "{:.5f}".format(duration_val)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, epoch)
if dataset == 'nell.0.001': features = load_nell(dataset)[1] else: features = load_data(dataset)[1] with open(changedadj_path, 'rb') as load_cha_adj: changed_adj = pickle.load(load_cha_adj) # Some preprocessing if FLAGS.features == 0: changed_features = preprocess_features(changed_adj + sp.eye(changed_adj.shape[0])) else: changed_features = preprocess_features(features) support = [preprocess_adj(changed_adj)] num_supports = 1 model_func = GCN # Define placeholders placeholders = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(changed_features[2], dtype=tf.int64)), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32),
'adjacency': tf.sparse_placeholder(tf.float32), 'features': tf.sparse_placeholder(tf.float32, shape=tf.constant(features[2], dtype=tf.int64)), 'labels': tf.placeholder(tf.float32, shape=(None, labels.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout } adjacency = preprocess_adj(adj) # Create model model = GCNN(placeholders, input_dim=features[2][1]) # Initialize session sess = tf.Session() # Define model evaluation function def evaluate(features, adjacency, labels, mask, placeholders): t_test = time.time() feed_dict_val = construct_feed_dict(features, adjacency, labels, mask, adjacency, placeholders) outs_val = sess.run([model.loss, model.accuracy, model.predict()], feed_dict=feed_dict_val) return outs_val[0], outs_val[1], (time.time() - t_test), outs_val[2]
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.') flags.DEFINE_float('dropout', 0.5, 'Dropout rate (1 - keep probability).') flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.') flags.DEFINE_string('gpu', '1', 'GPU selection.') os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( '../data', 'cora') # Some preprocessing features_dense, features = preprocess_features(features) support = [preprocess_adj(adj)] num_supports = 1 model_func = GCN # Define placeholders placeholders = { 'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)], 'features': tf.placeholder(tf.float32, shape=features[2]), 'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout }
'Weight for L2 loss on embedding matrix.') flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).') # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( FLAGS.dataset) total_edges = adj.data.shape[0] n_node = adj.shape[0] # Some preprocessing features = preprocess_features(features) # for non sparse features = sp.coo_matrix((features[1], (features[0][:, 0], features[0][:, 1])), shape=features[2]).toarray() support = preprocess_adj(adj) # for non sparse support = [ sp.coo_matrix((support[1], (support[0][:, 0], support[0][:, 1])), shape=support[2]).toarray() ] num_supports = 1 model_func = GCN save_name = 'nat_' + FLAGS.dataset if not os.path.exists(save_name): os.makedirs(save_name) # Define placeholders placeholders = { 's': [ tf.sparse_placeholder(tf.float32, shape=(n_node, n_node))
import torch import numpy as np import pickle from utils import load_data,preprocess_features,preprocess_adj,tuple_to_torchSparseTensor from gcn_model import GCN adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data("cora") adj_hat = preprocess_adj(adj) features = preprocess_features(features) # features[0].shape == (49216, 2) # features[1].sahpe == (49216,) # features[2] === (2708, 1433) # Convert to torch.Tensor sparse_adj_hat = tuple_to_torchSparseTensor(adj_hat) sparse_features = tuple_to_torchSparseTensor(features) y_train = torch.FloatTensor(y_train) # dtype = torch.float32 y_val = torch.FloatTensor(y_val) y_test = torch.FloatTensor(y_test) train_mask = torch.from_numpy(train_mask) # dtype = torch.bool val_mask = torch.from_numpy(val_mask) test_mask = torch.from_numpy(test_mask) model_file = 'training_dir/gcn_model.pkl' model = torch.load(model_file) output = model(sparse_adj_hat,sparse_features) test_loss = model.loss(output,y_test,test_mask) test_acc = model.accuracy(output,y_test,test_mask) print("model_file={},test_loss={},test_acc={}".format(model_file,test_loss.item(),test_acc.item()))
flags.DEFINE_integer('early_stopping', 10, 'Tolerance for early stopping (# of epochs).') flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.') flags.DEFINE_string('gpu', '1', 'GPU selection.') flags.DEFINE_string('method', args.method, 'Adversarial attack method') os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( FLAGS.dataset_dir, FLAGS.dataset) # Some preprocessing features_dense, features = preprocess_features(features) if FLAGS.model == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 model_func = GCN elif FLAGS.model == 'gcn_cheby': support = chebyshev_polynomials(adj, FLAGS.max_degree) num_supports = 1 + FLAGS.max_degree model_func = GCN elif FLAGS.model == 'dense': support = [preprocess_adj(adj)] # Not used num_supports = 1 model_func = MLP else: raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) # Define placeholders placeholders = {
import numpy as np from tensorflow.python.keras.callbacks import ModelCheckpoint from tensorflow.python.keras.optimizers import Adam from tensorflow.python.keras.layers import Lambda from tensorflow.python.keras.models import Model from gcn import GCN from utils import preprocess_adj,plot_embeddings, load_data_v1 if __name__ == "__main__": FEATURE_LESS = False A, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_v1( 'cora') A = preprocess_adj(A) features /= features.sum(axis=1, ).reshape(-1, 1) if FEATURE_LESS: X = np.arange(A.shape[-1]) feature_dim = A.shape[-1] else: X = features feature_dim = X.shape[-1] model_input = [X, A] # Compile model model = GCN(A.shape[-1], feature_dim, 16, y_train.shape[1], dropout_rate=0.5, l2_reg=2.5e-4, feature_less=FEATURE_LESS, ) model.compile(optimizer=Adam(0.01), loss='categorical_crossentropy', weighted_metrics=['categorical_crossentropy', 'acc'])
print("dropout:", FLAGS.dropout) sys.stdout.flush() print_args(FLAGS) # Load data adj, total_train_x,total_train_y,\ total_val_x,total_val_y,total_test_x,total_test_y,inputs_features = load_data(FLAGS.dataset,FLAGS.filepath) print(type(adj), adj.shape) print("total number of samples in train,val and test:", len(total_train_x), len(total_val_x), len(total_test_x)) sys.stdout.flush() support = preprocess_adj(adj, FLAGS.normalize) (init_indices, init_values, shape) = support model_func = CoupledGNN placeholders = { 'support_indices': tf.placeholder(tf.int64, shape=(None, 2)), 'Xs': tf.placeholder(tf.float32, shape=(None, adj.shape[0], 1)), 'y': tf.placeholder(tf.float32, shape=(None, adj.shape[0])), 'dropout': tf.placeholder_with_default(0., shape=()), } # Create model model = model_func(FLAGS, init_values, placeholders,