def load_data_del_edges(prob_del=0.4, seed=0, to_dense=True, enforce_connected=True, dataset_name='cora'): res = graph_delete_connections(prob_del, seed, *load_data(dataset_name), to_dense=to_dense, enforce_connected=enforce_connected) return res
def main(sample_name, epochs=200, get_probs=False): # Training settings valid = False no_cuda = False seed = 42 lr = 1e-2 weight_decay = 1e-5 hidden = 32 dropout = 0.5 cuda = not no_cuda and torch.cuda.is_available() np.random.seed(seed) torch.manual_seed(seed) if cuda: torch.cuda.manual_seed(seed) # Load data adj, features, labels, y_test, idx_train, idx_val, idx_test = load_data() # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=hidden, nclass=1, dropout=dropout) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() y_test = y_test.cuda() # Training model torch.set_grad_enabled(True) t_total = time.time() model.eval() print("------- Training GCN") for epoch in range(epochs): if epoch == epochs - 1: valid = True train(model, optimizer, epoch, adj, features, labels, idx_train, idx_val, valid) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Testing info = gcn_inference(sample_name, model, adj, features, y_test, idx_test, get_probs=get_probs) return info
def loadGcnData(dataset): nxgraph, y_mtx = utils.load_data(dataset) print(f'\t\t {len(nxgraph)} nodes') if DEBUG: print("Graph", nxgraph, len(nxgraph)) print("y", np.shape(y_mtx)) y = np.argmax(y_mtx, axis=1) if DEBUG: print(y_mtx, y) gtcount = y_mtx.shape[1] print(f'\t\t Ground Truth: {gtcount} communities') return nxgraph, toIGraph(nxgraph), y, gtcount
def sample_pubmed(self): A, X, Y = load_data('pubmed') A = A + A.T A[A > 1] = 1 nb_node = 4000 nb_graph = 5 graphs = [] nodes = np.random.permutation(A.shape[0]) g = 0 for g in range(nb_graph): sel_nodes = nodes[nb_node * g: nb_node * (g +1)] _Y = Y[sel_nodes] _A = A[sel_nodes][:,sel_nodes] _X = X[sel_nodes] sel = utils.largest_connected_components(_A) _A = _A[sel][:,sel] _X = _X[sel] _Y = _Y[sel] print('Pubmed: num node {}, num edge {}'.format(len(list(sel)), _A.sum())) graphs.append([_A.astype(np.float32), sp.csr_matrix(_X), _Y]) return graphs
def load_data_test(): d = gut.load_data() return d
help='Weight decay (L2 loss on parameters).') parser.add_argument('--hidden', type=int, default=16, help='Number of hidden units.') parser.add_argument('--dropout', type=float, default=0.5, help='Dropout rate (1 - keep probability).') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # Load data adj, features, labels, idx_train, idx_val, idx_test = load_data() print(adj) # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=args.hidden, nclass=labels.max().item() + 1, dropout=args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda()
args = Parser() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {} # load data remote_dataset = [[], []] # 画图用 loss_train_list = [[], [], [], [], []] accuracy_train_list = [[], [], [], [], []] loss_val_list = [[], [], [], []] accuracy_val_list = [[], [], [], []] adj, features, labels, idx_bob, idx_alice, idx_test, idx_val, idx_train = load_data( ) if args.cuda: adj = adj.cuda() features = features.cuda() labels = labels.cuda() idx_bob = idx_bob.cuda() idx_alice = idx_alice.cuda() idx_test = idx_test.cuda() idx_val = idx_val.cuda() idx_train = idx_train.cuda() bob_adj = adj.send(bob) bob_features = features.send(bob) bob_labels = labels[idx_bob].send(bob) bob_idx = idx_bob.send(bob)
acc = [] acc_class = [] time_per_step = [] smoothing = [] total_time = [] for r in range(configuration['repeating']): # Set random seed seed = model_config['random_seed'] np.random.seed(seed) model_config[ 'random_seed'] = np.random.random_integers( 1073741824) # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = \ load_data(model_config['dataset'], train_size=model_config['train_size'], validation_size=model_config['validation_size'], model_config=model_config, shuffle=model_config['shuffle'], repeat_state=r) print('train size = ', model_config['train_size'], '\n', 'lam = ', la, '\n', model_config['epochs']) # Initialize session with tf.Graph().as_default(): tf.set_random_seed(seed) gpu_options = tf.GPUOptions( allow_growth=True) with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads= model_config['threads'], inter_op_parallelism_threads= 2, # model_config['threads'],
self.cost = norm * tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits( logits=preds_sub, targets=labels_sub, pos_weight=pos_weight)) self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # Adam Optimizer self.opt_op = self.optimizer.minimize(self.cost) self.grads_vars = self.optimizer.compute_gradients(self.cost) # Given a training set of protein-protein interactions in yeast S. cerevisiae, our goal is to take these interactions # and train a GCN model that can predict new protein-protein interactions. That is, we would like to predict new # edges in the yeast protein interaction network. print("Start") # Check if regenerate_training_date is set to True: regenerate training/validation/test data adj, adj_train, val_edges, val_edges_false, test_edges, test_edges_false = load_data() num_nodes = adj.shape[0] num_edges = adj.sum() # # Simple GCN: no node features (featureless). Substitute the identity matrix for the feature matrix: X = I # features = sparse_to_tuple(sp.identity(num_nodes)) num_features = features[2][1] features_nonzero = features[1].shape[0] # # Store original adjacency matrix (without diagonal entries) for later # adj_orig = (adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape))
flags.DEFINE_integer("op", 1, "Training or Test") ############################### if_drop_edge = True if_save_model = False # if train the discriminator if_train_dis = True restore_trained_our = False showed_target_idx = 0 # the target index group of targets you want to show run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True) ################################### ### read and process the graph model_str = FLAGS.model dataset_str = FLAGS.dataset # Load data # _A_obs, _X_obs, _z_obs = utils.load_npz('data/citeseer.npz') adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( "citeseer") # _A_obs = _A_obs + _A_obs.T #变GCN_ori as GCN # _A_obs[_A_obs > 1] = 1 # adj = _A_obs adj_norm, adj_norm_sparse = preprocess_graph(adj) #_K = _z_obs.max()+1 #类别个数 _K = y_train.shape[1] features_normlize = normalize(features.tocsr(), axis=0, norm='max') features = sp.csr_matrix(features_normlize) # adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) # adj = adj_train if FLAGS.features == 0:
def train(model_config, sess, seed, repeat_state, data_split=None): # Print model_config very_begining = time.time() print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') if data_split: adj = data_split['adj'] features = data_split['features'] y_train = data_split['y_train'] y_val = data_split['y_val'] y_test = data_split['y_test'] train_mask = data_split['train_mask'] val_mask = data_split['val_mask'] test_mask = data_split['test_mask'] triplet = data_split['triplet'] else: # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, size_of_each_class, triplet = \ load_data(model_config['dataset'],train_size=model_config['train_size'], validation_size=model_config['validation_size'], model_config=model_config, shuffle=model_config['shuffle'], repeat_state=repeat_state) stored_A = model_config['dataset'] if model_config['drop_inter_class_edge']: adj = drop_inter_class_edge(adj) stored_A = model_config['dataset'] + '_drop' # preprocess_features begin = time.time() features = smooth(features, adj, model_config['smoothing'], model_config, stored_A=stored_A + '_A_I') print(time.time() - begin, 's') data_split = { 'adj': adj, 'features': features, 'y_train': y_train, 'y_val': y_val, 'y_test': y_test, 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'triplet': triplet } laplacian = sparse.diags(adj.sum(1).flat, 0) - adj laplacian = laplacian.astype(np.float32).tocoo() if type(model_config['t']) == int and model_config['t'] < 0: eta = adj.shape[0] / (adj.sum() / adj.shape[0])**len( model_config['connection']) model_config['t'] = (y_train.sum(axis=0) * 3 * eta / y_train.sum()).astype(np.int64) print('t=', model_config['t']) # origin_adj = adj if model_config['Model'] == 0: pass elif model_config['Model'] in [1, 2, 3, 4]: # absorption probability print( 'Calculating Absorption Probability...', # 's :{}'.format(model_config['s']), 'alpha :{}'.format(model_config['alpha']), 'type :{}'.format(model_config['absorption_type']), sep='\n') if model_config['Model'] == 1: adj = Model1(adj, model_config['t'], model_config['alpha'], model_config['absorption_type']) elif model_config['Model'] == 2: adj = Model2(adj, model_config['s'], model_config['alpha'], y_train) elif model_config['Model'] == 3: # original_y_train = y_train y_train, train_mask = Model3(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 4: y_train, train_mask = Model4(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 5: adj = Model5(features, adj, model_config['mu']) elif model_config['Model'] == 6: adj = Model6(adj) elif model_config['Model'] == 7: y_train, train_mask = Model7(adj, model_config['s'], model_config['alpha'], y_train, train_mask, features) elif model_config['Model'] == 8: # original_y_train = y_train y_train, train_mask = Model8(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 9: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') elif model_config['Model'] == 10: y_train, train_mask = Model10(adj, model_config['s'], model_config['t'], model_config['alpha'], y_train, train_mask, features, stored_A=stored_A + '_A_H') elif model_config['Model'] == 11: y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 12: pass elif model_config['Model'] == 13: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 14: y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 15: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 16: with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=model_config['threads']) ) as sub_sess: tf.set_random_seed(seed) test_acc, test_acc_of_class, prediction = train( model_config['Model_to_add_label'], sub_sess, seed, data_split=data_split) y_train, train_mask = Model16(prediction, model_config['t'], y_train, train_mask) model_config = model_config['Model_to_predict'] print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') elif model_config['Model'] == 17: if model_config['smoothing'] is not None: stored_A = None adj = construct_knn_graph(features, model_config['k']) else: stored_A = stored_A + '_A_I' if model_config['drop_inter_class_edge']: stored_A = None test_acc, test_acc_of_class, prediction = Model17( adj, model_config['alpha'], y_train, train_mask, y_test, stored_A=stored_A) print("Test set results: accuracy= {:.5f}".format(test_acc)) print("accuracy of each class=", test_acc_of_class) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time( ) - very_begining elif model_config['Model'] == 18: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') alpha = 1e-6 test_acc, test_acc_of_class, prediction = Model17(adj, alpha, y_train, train_mask, y_test, stored_A=stored_A + '_A_I') print("Test set results: accuracy= {:.5f}".format(test_acc)) print("accuracy of each class=", test_acc_of_class) return test_acc, test_acc_of_class, prediction elif model_config['Model'] == 19: with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=model_config['threads']) ) as sub_sess: tf.set_random_seed(seed) test_acc, test_acc_of_class, prediction = train( model_config['Model_to_add_label'], sub_sess, seed, data_split=data_split) stored_A = stored_A + '_A_I' # print(time.time()-very_begining) y_train, train_mask = Model19(prediction, model_config['t'], y_train, train_mask, adj, model_config['alpha'], stored_A, model_config['Model19']) # print(time.time()-very_begining) model_config = model_config['Model_to_predict'] print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') elif model_config['Model'] == 20: pass elif model_config['Model'] == 21: pass elif model_config['Model'] == 22: alpha = model_config['alpha'] stored_A = stored_A + '_A_I' features = Model22(adj, features, alpha, stored_A) elif model_config['Model'] == 23: if model_config['classifier'] == 'tree': clf = tree.DecisionTreeClassifier( max_depth=model_config['tree_depth']) t = time.time() clf.fit(features[train_mask], np.argmax(y_train[train_mask], axis=1)) t = time.time() - t prediction = clf.predict(features[test_mask]) elif model_config['classifier'] == 'svm': clf = svm.SVC( ) #kernel='rbf', gamma=model_config['gamma'], class_weight='balanced', degree=model_config['svm_degree']) t = time.time() clf.fit(features[train_mask], np.argmax(y_train[train_mask], axis=1)) t = time.time() - t prediction = clf.predict(features[test_mask]) elif model_config['classifier'] == 'cnn': prediction, t = cnn.train(model_config, features, train_mask, y_train, test_mask, y_test) else: raise ValueError( "model_config['classifier'] should be in ['svm', 'tree']") test_acc = np.sum(prediction == np.argmax(y_test[test_mask], axis=1)) / np.sum(test_mask) # test_acc = test_acc[0] one_hot_prediction = np.zeros(y_test[test_mask].shape) one_hot_prediction[np.arange(one_hot_prediction.shape[0]), prediction] = 1 test_acc_of_class = np.sum(one_hot_prediction * y_test[test_mask], axis=0) / np.sum(y_test[test_mask], axis=0) #TODO print("Test set results: cost= {:.5f} accuracy= {:.5f} time= {:.5f}". format(0., test_acc, 0.)) print("accuracy of each class=", test_acc_of_class) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, t elif model_config['Model'] == 26: adj = Model26(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') elif model_config['Model'] == 28: features = Model28(adj, features, stored_A, model_config['k']) else: raise ValueError( '''model_config['Model'] must be in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,''' ''' 11, 12, 13, 14, 15, 16, 17, 18], but is {} now'''.format( model_config['Model'])) # Some preprocessing if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: train_features = features[train_mask] val_features = features[val_mask] test_features = features[test_mask] else: train_features = features val_features = features test_features = features if sparse.issparse(features): train_features = sparse_to_tuple(train_features) val_features = sparse_to_tuple(val_features) test_features = sparse_to_tuple(test_features) features = sparse_to_tuple(features) if model_config['Model'] == 12: if model_config['k'] < 0: if hasattr(model_config['train_size'], '__getitem__'): eta = 0 for i in model_config['train_size']: eta += i eta /= adj.shape[0] else: eta = model_config['train_size'] / 100 k = (1 / eta)**(1 / len(model_config['connection'])) k = int(k) else: k = model_config['k'] model_config['name'] += '_k{}'.format(k) support = Model12(adj, k) num_supports = len(support) elif model_config['conv'] == 'taubin': support = [ sparse_to_tuple( taubin_smoothor(adj, model_config['taubin_lambda'], model_config['taubin_mu'], model_config['taubin_repeat'])) ] num_supports = 1 elif model_config['conv'] == 'test21': support = [ sparse_to_tuple( Test21(adj, model_config['alpha'], beta=model_config['beta'], stored_A=stored_A + '_A_I')) ] num_supports = 1 elif model_config['conv'] == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 elif model_config['conv'] == 'gcn_unnorm': support = [sparse_to_tuple(adj.astype(np.float32))] num_supports = 1 elif model_config['conv'] == 'gcn_noloop': support = [preprocess_adj(adj, loop=False)] num_supports = 1 elif model_config['conv'] == 'gcn_rw': support = [preprocess_adj(adj, type='rw')] num_supports = 1 elif model_config['conv'] in ['cheby', 'chebytheta']: # origin_adj_support = chebyshev_polynomials(origin_adj, model_config['max_degree']) support = chebyshev_polynomials(adj, model_config['max_degree']) num_supports = 1 + model_config['max_degree'] else: raise ValueError('Invalid argument for model_config["conv"]: ' + str(model_config['conv'])) # Define placeholders placeholders = { 'support': [ tf.sparse_placeholder(tf.float32, name='support' + str(i)) for i in range(num_supports) ], 'features': tf.sparse_placeholder(tf.float32, name='features') if isinstance( features, tf.SparseTensorValue) else tf.placeholder( tf.float32, shape=[None, features.shape[1]], name='features'), 'labels': tf.placeholder(tf.int32, name='labels', shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32, name='labels_mask'), 'dropout': tf.placeholder_with_default(0., name='dropout', shape=()), 'num_features_nonzero': tf.placeholder(tf.int32, name='num_features_nonzero'), # helper variable for sparse dropout 'laplacian': tf.SparseTensor(indices=np.vstack([laplacian.row, laplacian.col]).transpose(), values=laplacian.data, dense_shape=laplacian.shape), 'triplet': tf.placeholder(tf.int32, name='triplet', shape=(None, None)), 'noise_sigma': tf.placeholder(tf.float32, name='noise_sigma'), 'noise': tf.sparse_placeholder(tf.float32, name='features') if isinstance( features, tf.SparseTensorValue) else tf.placeholder( tf.float32, shape=[None, features.shape[1]], name='features') } if model_config['Model'] in [11, 13, 14, 15]: placeholders['label_per_sample'] = tf.placeholder( tf.float32, name='label_per_sample', shape=(None, label_per_sample.shape[1])) placeholders['sample2label'] = tf.placeholder( tf.float32, name='sample2label', shape=(label_per_sample.shape[1], y_train.shape[1])) # Create model model = GCN_MLP(model_config, placeholders, input_dim=train_features[2][1]) # Random initialize sess.run(tf.global_variables_initializer()) # Initialize FileWriter, saver & variables in graph train_writer = None valid_writer = None saver = None # Construct feed dictionary if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: train_feed_dict = construct_feed_dict( train_features, support, y_train[train_mask], np.ones(train_mask.sum(), dtype=np.bool), triplet, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) valid_feed_dict = construct_feed_dict( val_features, support, y_val[val_mask], np.ones(val_mask.sum(), dtype=np.bool), triplet, 0, placeholders) test_feed_dict = construct_feed_dict( test_features, support, y_test[test_mask], np.ones(test_mask.sum(), dtype=np.bool), triplet, 0, placeholders) else: train_feed_dict = construct_feed_dict(train_features, support, y_train, train_mask, triplet, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) valid_feed_dict = construct_feed_dict(val_features, support, y_val, val_mask, triplet, 0, placeholders) test_feed_dict = construct_feed_dict(test_features, support, y_test, test_mask, triplet, 0, placeholders) if model_config['Model'] in [11, 13, 14, 15]: train_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) train_feed_dict.update({placeholders['sample2label']: sample2label}) valid_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) valid_feed_dict.update({placeholders['sample2label']: sample2label}) test_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) test_feed_dict.update({placeholders['sample2label']: sample2label}) # tmp = sess.run([model.prediction, model.sample2label], feed_dict=test_feed_dict) # Some support variables valid_loss_list = [] max_valid_acc = 0 max_train_acc = 0 t_test = time.time() test_cost, test_acc, test_acc_of_class, prediction = sess.run( [ model.loss, model.accuracy, model.accuracy_of_class, model.prediction ], feed_dict=test_feed_dict) test_duration = time.time() - t_test timer = 0 begin = time.time() # print(time.time() - very_begining) if model_config['train']: # Train model print('training...') for step in range(model_config['epochs']): if model_config['Model'] in [ 20, 21 ] and step == model_config['epochs'] / 2: stored_A = stored_A + '_A_I' y_train, train_mask = Model20(prediction, model_config['t'], y_train, train_mask, adj, model_config['alpha'], stored_A) if model_config['Model'] == 21: y_train, train_mask = Model16(prediction, model_config['t2'], y_train, train_mask) train_feed_dict = construct_feed_dict( features, support, y_train, train_mask, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) max_valid_acc = 0 max_train_acc = 0 # Training step if model_config['logdir'] and step % 100 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() t = time.time() sess.run(model.opt_op, feed_dict=train_feed_dict, options=run_options, run_metadata=run_metadata) t = time.time() - t train_writer.add_run_metadata(run_metadata, 'step%d' % step) # Create the Timeline object, and write it to a json with open(path.join(model_config['logdir'], 'timeline.json'), 'w') as f: f.write( timeline.Timeline(run_metadata.step_stats). generate_chrome_trace_format()) else: t = time.time() if isinstance(train_features, tf.SparseTensorValue): train_feed_dict.update({ placeholders['features']: tf.SparseTensorValue( train_features.indices, train_features.values + np.random.normal(0, model_config['noise_sigma'], train_features.indices.shape[0]), train_features.dense_shape) }) else: train_feed_dict.update({ placeholders['features']: train_features + np.random.normal(0, model_config['noise_sigma'], train_features.shape) }) sess.run(model.opt_op, feed_dict=train_feed_dict) t = time.time() - t timer += t train_loss, train_acc, train_summary = sess.run( [model.loss, model.accuracy, model.summary], feed_dict=train_feed_dict) # Logging if model_config['logdir']: global_step = model.global_step.eval(session=sess) train_writer.add_summary(train_summary, global_step) valid_writer.add_summary(valid_summary, global_step) # If it's best performence so far, evalue on test set if model_config['validate']: valid_loss, valid_acc, valid_summary = sess.run( [model.loss, model.accuracy, model.summary], feed_dict=valid_feed_dict) valid_loss_list.append(valid_loss) if valid_acc >= max_valid_acc: max_valid_acc = valid_acc t_test = time.time() test_cost, test_acc, test_acc_of_class = sess.run( [model.loss, model.accuracy, model.accuracy_of_class], feed_dict=test_feed_dict) test_duration = time.time() - t_test prediction = sess.run(model.prediction, train_feed_dict) if args.verbose: print('*', end='') else: if train_acc >= max_train_acc: max_train_acc = train_acc t_test = time.time() test_cost, test_acc, test_acc_of_class = sess.run( [model.loss, model.accuracy, model.accuracy_of_class], feed_dict=test_feed_dict) test_duration = time.time() - t_test prediction = sess.run(model.prediction, train_feed_dict) if args.verbose: print('*', end='') # Print results if args.verbose: print("Epoch: {:04d}".format(step), "train_loss= {:.3f}".format(train_loss), "train_acc= {:.3f}".format(train_acc), end=' ') if model_config['validate']: print("val_loss=", "{:.3f}".format(valid_loss), "val_acc= {:.3f}".format(valid_acc), end=' ') print("time=", "{:.5f}".format(t)) if 0 < model_config['early_stopping'] < step \ and valid_loss_list[-1] > np.mean(valid_loss_list[-(model_config['early_stopping'] + 1):-1]): print("Early stopping...") break else: print("Optimization Finished!") # Testing print("Test set results:", "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration)) print("accuracy of each class=", test_acc_of_class) # Saving if model_config['logdir']: print('Save model to "{:s}"'.format( saver.save(sess=sess, save_path=path.join(model_config['logdir'], 'model.ckpt'), global_step=global_step))) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time( ) - begin
# In[2]: from gcn.utils import load_data import matplotlib.pyplot as plt import seaborn as sns import numpy as np import networkx as nx from scipy.sparse import coo_matrix from scipy.sparse import csgraph from tqdm import tqdm from sklearn.svm import SVC # In[3]: adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( 'cora') features = features.A # In[4]: def one_hot_to_cat(X): ''' shape of X = (n_samples, n_classes) ''' return np.apply_along_axis(arr=X, axis=1, func1d=lambda x: np.argmax(x)) # In[34]: model = SVC(decision_function_shape='ovr', C=1, kernel='linear')
def train(model_config, sess, repeat_state): # Print model_name very_begining = time.time() print('', 'name : {}'.format(model_config['name']), 'dataset : {}'.format(model_config['dataset']), sep='\n') # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = \ load_data(model_config['dataset'], train_size=model_config['train_size'], validation_size=model_config['validation_size'], model_config=model_config, shuffle=model_config['shuffle'], repeat_state=repeat_state) if model_config['Model'] == 'LP': train_time = time.time() test_acc, test_acc_of_class = Model17(adj, model_config['alpha'], y_train, y_test) train_time = time.time() - train_time print("Test set results: accuracy= {:.5f}".format(test_acc)) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, 0, train_time, train_time # preprocess_features if model_config['smooth_config']['type'] is not None: if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: fetch = train_mask + val_mask + test_mask new_features = np.zeros(features.shape, dtype=features.dtype) new_features[fetch], smoothing_time = graphconv( features, adj, model_config['smooth_config'], fetch=fetch) features = new_features else: features, smoothing_time = graphconv(features, adj, model_config['smooth_config']) else: smoothing_time = 0 support = [preprocess_adj(adj)] num_supports = 1 # Speed up for MLP is_mlp = model_config['connection'] == [ 'f' for _ in range(len(model_config['connection'])) ] if is_mlp: train_features = features[train_mask] y_train = y_train[train_mask] y_train = y_train.astype(np.int32) val_features = features[val_mask] test_features = features[test_mask] labels_mask = np.ones(train_mask.sum(), dtype=np.int32) else: train_features = features val_features = features test_features = features labels_mask = train_mask.astype(np.int32) y_train = y_train.astype(np.int32) input_dim = features.shape[1] if sparse.issparse(features): train_features = sparse_to_tuple(train_features) val_features = sparse_to_tuple(val_features) test_features = sparse_to_tuple(test_features) features = sparse_to_tuple(features) # Define placeholders placeholders = { 'labels': tf.placeholder_with_default(y_train, name='labels', shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder_with_default(labels_mask, shape=(None), name='labels_mask'), 'dropout': tf.placeholder_with_default(0., name='dropout', shape=()), 'adj_nnz': tf.placeholder_with_default(support[0].values.shape, shape=(1), name='adj_nnz'), } if not is_mlp: placeholders['support'] = [ tf.sparse_placeholder(tf.float32, name='support' + str(i)) for i in range(num_supports) ] if isinstance(train_features, tf.SparseTensorValue): placeholders['num_features_nonzero'] = tf.placeholder_with_default( train_features[1].shape, shape=(1), name='num_features_nonzero') placeholders['features'] = tf.sparse_placeholder(tf.float32, name='features') else: placeholders['num_features_nonzero'] = tf.placeholder_with_default( [0], shape=(1), name='num_features_nonzero') placeholders['features'] = tf.placeholder_with_default( train_features, shape=[None, features.shape[1]], name='features') # Create model model = IGCN(model_config, placeholders, input_dim=input_dim) # Random initialize sess.run(tf.global_variables_initializer()) # Initialize FileWriter, saver & variables in graph train_writer = None valid_writer = None saver = tf.train.Saver() # Construct feed dictionary if is_mlp: if isinstance(features, tf.SparseTensorValue): train_feed_dict = { placeholders['features']: train_features, placeholders['dropout']: model_config['dropout'], } else: train_feed_dict = { placeholders['dropout']: model_config['dropout'] } valid_feed_dict = construct_feed_dict( val_features, support, y_val[val_mask], np.ones(val_mask.sum(), dtype=np.bool), 0, placeholders) test_feed_dict = construct_feed_dict( test_features, support, y_test[test_mask], np.ones(test_mask.sum(), dtype=np.bool), 0, placeholders) else: train_feed_dict = construct_feed_dict(train_features, support, y_train, train_mask, model_config['dropout'], placeholders) valid_feed_dict = construct_feed_dict(val_features, support, y_val, val_mask, 0, placeholders) test_feed_dict = construct_feed_dict(test_features, support, y_test, test_mask, 0, placeholders) # Some support variables acc_list = [] max_valid_acc = 0 min_train_loss = 1000000 t_test = time.time() sess.run(model.assign_data, feed_dict=test_feed_dict) test_cost, test_acc, test_acc_of_class = sess.run( [model.cross_entropy_loss, model.accuracy, model.accuracy_of_class]) sess.run(model.assign_data, feed_dict=train_feed_dict) valid_loss, valid_acc, valid_summary = sess.run( [model.cross_entropy_loss, model.accuracy, model.summary], feed_dict=valid_feed_dict) test_duration = time.time() - t_test train_time = 0 step = model_config['epochs'] if model_config['train']: # Train model print('training...') for step in range(model_config['epochs']): # Training step t = time.time() sess.run(model.opt_op) t = time.time() - t train_time += t train_loss, train_acc = sess.run( [model.cross_entropy_loss, model.accuracy]) # if True: if step > model_config['epochs'] * 0.9 or step % 20 == 0: # If it's best performence so far, evalue on test set if model_config['validate']: sess.run(model.assign_data, feed_dict=valid_feed_dict) valid_loss, valid_acc = sess.run( [model.cross_entropy_loss, model.accuracy]) acc_list.append(valid_acc) if valid_acc >= max_valid_acc: max_valid_acc = valid_acc t_test = time.time() sess.run(model.assign_data, feed_dict=test_feed_dict) test_cost, test_acc, test_acc_of_class = \ sess.run([model.cross_entropy_loss, model.accuracy, model.accuracy_of_class]) test_duration = time.time() - t_test if args.verbose: print('*', end='') else: acc_list.append(train_acc) if train_loss < min_train_loss: min_train_loss = train_loss t_test = time.time() sess.run(model.assign_data, feed_dict=test_feed_dict) test_cost, test_acc, test_acc_of_class = \ sess.run([model.cross_entropy_loss, model.accuracy, model.accuracy_of_class]) test_duration = time.time() - t_test if args.verbose: print('*', end='') sess.run(model.assign_data, feed_dict=train_feed_dict) # Print results if args.verbose: print("Epoch: {:04d}".format(step), "train_loss= {:.3f}".format(train_loss), "train_acc= {:.3f}".format(train_acc), end=' ') if model_config['validate']: print("val_loss=", "{:.3f}".format(valid_loss), "val_acc= {:.3f}".format(valid_acc), end=' ') else: print("test_loss=", "{:.3f}".format(test_cost), "test_acc= {:.3f}".format(test_acc), end=' ') print("time=", "{:.5f}".format(t)) print("Test set results:", "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration)) # Saving if model_config['logdir']: print('Save model to "{:s}"'.format( saver.save(sess=sess, save_path=path.join(model_config['logdir'], 'model.ckpt')))) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, train_time / step * 1000, smoothing_time, train_time + smoothing_time
flags.DEFINE_float('learning_rate', 0.1, 'Initial learning rate.') # 0.01 flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.') flags.DEFINE_integer('hidden1', 16, 'Number of units in hidden layer 1.') # 16 flags.DEFINE_integer('hidden2', 16, 'Number of units in hidden layer 2.') # 16 flags.DEFINE_integer('hidden3', 16, 'Number of units in hidden layer 3.') # 16 flags.DEFINE_integer('hidden4', 16, 'Number of units in hidden layer 4.') # 16 flags.DEFINE_float('dropout', 0.01, 'Dropout rate (1 - keep probability).') # 0.5 flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.') flags.DEFINE_integer('early_stopping', 20, 'Tolerance for early stopping (# of epochs).') flags.DEFINE_integer('max_degree', 3, 'Maximum Chebyshev polynomial degree.') # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = utils.load_data( FLAGS.dataset) # Some preprocessing features = preprocess_features(features) if FLAGS.model == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 model_func = models.GCN elif FLAGS.model == 'gcn_cheby': support = chebyshev_polynomials(adj, FLAGS.max_degree) num_supports = 1 + FLAGS.max_degree model_func = models.GCN elif FLAGS.model == 'dense': support = [preprocess_adj(adj)] # Not used num_supports = 1 model_func = models.MLP
# parser.add_argument('--path', type=str, default='../data/cora/', help='data path') parser.add_argument('--dataset', type=str, default='cora', help='dataset name') parser.add_argument('--sub_dataset', type=str, default='', help='dataset name') opt = parser.parse_args() opt.cuda = not opt.no_cuda and torch.cuda.is_available() np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.cuda: torch.cuda.manual_seed(opt.seed) # Download data download_data(opt.dataset) # Load data adj, features, labels, idx_train, idx_val, idx_test = load_data(opt.dataset,opt.sub_dataset) # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=opt.hidden, nclass=labels.max().item() + 1, dropout=opt.dropout) optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) if opt.cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda()