def benchmark_task_val(args): graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array( util.node_dict(G)[u]['label']) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim) train(i, train_dataset, model, args, val_dataset)
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) example_node = util.node_dict(graphs[0])[0] if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in example_node: print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array( util.node_dict(G)[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'amg-assign': print('Method: amd-assign') model = encoders.AmgPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
def benchmark_task(args, writer=None, feat='node-label'): graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = \ prepare_data(graphs, args, max_nodes=args.max_nodes) if args.method == 'amg-assign': print('Method: amg-assign') model = encoders.AmgPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer) evaluate(test_dataset, model, args, 'Validation')
def benchmark_task_val(args, feat='node-label', pred_hidden_dims = [50], device='cpu'): all_vals = [] data_out_dir = 'data/data_preprocessed/' + args.bmname + '/pool_sizes_' + args.pool_sizes if args.normalize ==0: data_out_dir = data_out_dir + '_nor_' + str(args.normalize) data_out_dir = data_out_dir + '/' if not os.path.exists(data_out_dir): os.makedirs(data_out_dir) graph_list_file_name = data_out_dir + 'graphs_list.p' dataset_file_name = data_out_dir + 'dataset.p' if os.path.isfile(graph_list_file_name) and os.path.isfile(dataset_file_name): print('Files exist, reading from stored files....') print('Reading file from', data_out_dir) with open(dataset_file_name, 'rb') as f: graphs = pickle.load(f) with open(graph_list_file_name, 'rb') as f: graphs_list = pickle.load(f) print('Data loaded!') else: print('No files exist, preprocessing datasets...') graphs = load_data.read_graphfile(args.datadir,args.bmname, max_nodes =args.max_nodes) print('Data length before filtering: ', len(graphs)) dataset_copy = graphs.copy() len_data = len(graphs) graphs_list = [] pool_sizes = [int(i) for i in args.pool_sizes.split('_')] print('pool_sizes: ', pool_sizes ) for i in range(len_data): adj = nx.adjacency_matrix(dataset_copy[i]) # print('Adj shape',adj.shape) if adj.shape[0] < args.min_nodes or adj.shape[0]> args.max_nodes or adj.shape[0]!= dataset_copy[i].number_of_nodes(): graphs.remove(dataset_copy[i]) # index_list.remove(i) else: # print('----------------------', i, adj.shape) number_of_nodes = dataset_copy[i].number_of_nodes() # if args.pool_ratios is not None: # pool_sizes = [] # pre_layer_number_of_nodes = number_of_nodes # for i in range(len(pool_ratios)): # number_of_nodes_after_pool = int(pre_layer_number_of_nodes*pool_ratios[i]) # pool_sizes.append(number_of_nodes_after_pool) # pre_layer_number_of_nodes = number_of_nodes_after_pool # print('Test pool_sizes: ', pool_sizes) coarsen_graph = gp(adj.todense().astype(float), pool_sizes) # if args.method == 'wave': coarsen_graph.coarsening_pooling(args.normalize) graphs_list.append(coarsen_graph) print('Data length after filtering: ', len(graphs), len(graphs_list)) print('Dataset preprocessed, dumping....') with open(dataset_file_name, 'wb') as f: pickle.dump(graphs, f) with open(graph_list_file_name, 'wb') as f: pickle.dump(graphs_list, f) print('Dataset dumped!') if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) # total_test_ac = 0 # total_test_best_ac = 0 # total_best_val_ac = 0 for i in range(10): if i == args.shuffle: if args.with_test: train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim = \ prepare_data(graphs, graphs_list, args, test_graphs = None,max_nodes=args.max_nodes, seed = i) else: train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim = \ prepare_data(graphs, graphs_list, args, test_graphs = [],max_nodes=args.max_nodes, seed = i) out_dir = args.bmname+ '/tar_' + str(args.train_ratio) + '_ter_' + str(args.test_ratio) + '/' + 'num_shuffle' + str(args.num_shuffle) + '/' + 'numconv_' + str(args.num_gc_layers) + '_dp_' + str(args.dropout) + '_wd_' + str(args.weight_decay) + '_b_' + str(args.batch_size) + '_hd_' + str(args.hidden_dim) + '_od_' + str(args.output_dim) + '_ph_' + str(args.pred_hidden) + '_lr_' + str(args.lr) + '_concat_' + str(args.concat) out_dir = out_dir + '_ps_' + args.pool_sizes + '_np_' + str(args.num_pool_matrix) + '_nfp_' + str(args.num_pool_final_matrix) + '_norL_' + str(args.normalize) + '_mask_' + str(args.mask) + '_ne_' + args.norm + '_cf_' + str(args.con_final) results_out_dir = args.out_dir + '/' + args.bmname + '/with_test' + str(args.with_test) + '/using_feat_' + args.feat + '/no_val_results/with_shuffles/' + out_dir + '/' log_out_dir = args.out_dir + '/' + args.bmname + '/with_test' + str(args.with_test) + '/using_feat_' + args.feat + '/no_val_logs/with_shuffles/'+out_dir + '/' if not os.path.exists(results_out_dir): os.makedirs(results_out_dir, exist_ok=True) if not os.path.exists(log_out_dir): os.makedirs(log_out_dir, exist_ok=True) results_out_file = results_out_dir + 'shuffle'+ str(args.shuffle) + '.txt' log_out_file = log_out_dir + 'shuffle' + str(args.shuffle) + '.txt' results_out_file_2 = results_out_dir + 'test_shuffle' + str(args.shuffle) + '.txt' val_out_file = results_out_dir + 'val_result' + str(args.shuffle) + '.txt' print(results_out_file) with open(log_out_file, 'a') as f: f.write('Shuffle ' +str(i) + '====================================================================================\n') pool_sizes = [int(i) for i in args.pool_sizes.split('_')] model = encoders.WavePoolingGcnEncoder(max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.num_pool_matrix, args.num_pool_final_matrix,pool_sizes = pool_sizes, pred_hidden_dims = pred_hidden_dims, concat = args.concat,bn=args.bn, dropout=args.dropout, mask = args.mask,args=args, device=device) if args.with_test: _, val_accs, test_accs, best_val_result = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, log_dir = log_out_file, device=device) else: _, val_accs, test_accs, best_val_result = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, log_dir = log_out_file, device=device) print('Shuffle ', i, '--------- best val result', best_val_result ) if args.with_test: test_ac = test_accs[best_val_result['epoch']] print('Test accuracy: ', test_ac) best_val_ac = best_val_result['acc'] print('Best val on shuffle ', (args.shuffle), best_val_ac) if args.with_test: print('Test on shuffle', args.shuffle,' : ', test_ac) np.savetxt(val_out_file, val_accs) with open(results_out_file, 'w') as f: f.write('Best val on shuffle '+ str(args.shuffle) + ': ' + str(best_val_ac) + '\n') if args.with_test: with open(results_out_file_2, 'w') as f: f.write('Test accuracy on shuffle ' + str( args.shuffle ) + ':' + str(test_ac) + '\n') with open(log_out_file,'a') as f: f.write('Best val on shuffle ' + str(args.shuffle ) + ' : ' + str(best_val_ac) + '\n') if args.with_test: f.write('Test on shuffle ' + str( args.shuffle ) + ' : ' + str(test_ac) + '\n') f.write('------------------------------------------------------------------\n')
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) #args.max_nodes = 600, maxnodes #print("maxnodes=",maxnodes),features #compute_matching_degree(features) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].nodes[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.nodes[u]['feat'] = np.array(G.nodes[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): print("****************", i) train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) temp_input_dim = [] temp_assign_input_dim = [] for i in range(args.num_aspect): temp_input_dim.append(input_dim) temp_assign_input_dim.append(assign_input_dim) input_dim_aspect = temp_input_dim assign_input_dim_aspect = temp_assign_input_dim #assign_input_di, max_graph_node_numm_aspect = [assign_input_dim, assign_input_dim, assign_input_dim] if args.method == 'MxGNN': print('Method: MxGNN') if args.merge_method == 'cat': model = MxGNNCat.SoftPoolingGcnEncoder( max_num_nodes, args.num_aspect, args.multi_conv, args.multi_pool, input_dim_aspect, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim_aspect).cuda() else: model = MxGNNSum.SoftPoolingGcnEncoder( max_num_nodes, args.num_aspect, args.multi_conv, args.multi_pool, input_dim_aspect, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim_aspect).cuda() elif args.method == 'diffpool': print('Method: diffpool') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals)) result = "./result.txt" f = open(result, 'a+') f.write(str(np.max(all_vals))) f.write("\n") f.close()
model, train_dataloader, val_dataloader, device, writer=writer) return train_acc, val_acc if __name__ == "__main__": args = arg_parse.arg_parse() if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') graphs = load_data.read_graphfile('data', 'DD', max_nodes=args.max_nodes) #graphs = graphs[0:50] for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) feature_dim = len(graphs[0].node[0]['feat']) print("read file over!!") N = len(graphs) N1 = int(N * args.train_ratio) N2 = N - N1 random.seed(args.seed) random.shuffle(graphs) train_accs = [] val_accs = [] k = int(args.fold_id) if k == -1:
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) # have value example_node = util.node_dict(graphs[0])[0] if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in example_node: print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array( util.node_dict(G)[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range( min([ 10, max([ int( len(graphs) * (1 - args.train_ratio - args.test_ratio)), 1 ]) ])): '''I revised here 10-> min[...]''' train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, num_unpooling=args.num_unpool, unpool_ratio=args.ratio_unpool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print('all_vals: ', all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
bn=args.bn, dropout=args.dropout, alpha=args.alpha, args=args).cuda() print("start to train!") train_acc, test_acc = train(train_dataset_loader, model, args, val_dataset=val_dataset_loader, test_dataset=None, \ out_file=args.out_file, writer=writer,fold_id=fold_id) return train_acc, test_acc if __name__ == "__main__": args = arg_parse() graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) print("read file over!!") N = len(graphs) N1 = int(N * args.train_ratio) N2 = N - N1 random.seed(args.seed) random.shuffle(graphs) train_accs = [] val_accs = [] #for k in range(10): k = int(args.fold_id) if k == -1: #10 fold in a process run!!
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) del train_dataset, val_dataset, model torch.cuda.empty_cache() all_vals = np.vstack(all_vals) with open('log_', 'a+') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( 'method', 'gc', 'dim', '10-mean', '10-std', '20-mean', '20-std', '30-mean', '30-std', '40-mean', '40-std', '50-mean', '50-std', '60-mean', '60-std', '70-mean', '70-std', '80-mean', '80-std', '90-mean', '90-std', '100-mean', '100-std', 'std')) f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( args.method, args.num_gc_layers, args.output_dim, all_vals[:, 10-1].mean(), all_vals[:, 10-1].std(), all_vals[:, 20-1].mean(), all_vals[:, 20-1].std(), all_vals[:, 30-1].mean(), all_vals[:, 30-1].std(), all_vals[:, 40-1].mean(), all_vals[:, 40-1].std(), all_vals[:, 50-1].mean(), all_vals[:, 50-1].std(), all_vals[:, 60-1].mean(), all_vals[:, 60-1].std(), all_vals[:, 70-1].mean(), all_vals[:, 70-1].std(), all_vals[:, 80-1].mean(), all_vals[:, 80-1].std(), all_vals[:, 90-1].mean(), all_vals[:, 90-1].std(), all_vals[:, 100-1].mean(), all_vals[:, 100-1].std(), np.max(np.mean(all_vals, axis=0)))) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))