def syn_community1v2(args, writer=None, export_graphs=False): # data graphs1 = datagen.gen_ba( range(40, 60), range(4, 5), 500, featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))) for G in graphs1: G.graph['label'] = 0 if export_graphs: util.draw_graph_list(graphs1[:16], 4, 4, 'figs/ba') graphs2 = datagen.gen_2community_ba( range(20, 30), range(4, 5), 500, 0.3, [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))]) for G in graphs2: G.graph['label'] = 1 if export_graphs: util.draw_graph_list(graphs2[:16], 4, 4, 'figs/ba2') graphs = graphs1 + graphs2 train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data( graphs, args) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, linkpred=args.linkpred, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, 2, args.num_gc_layers, bn=args.bn).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, 2, args.num_gc_layers, bn=args.bn).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer)
def pkl_task(args, feat=None): with open(os.path.join(args.datadir, args.pkl_fname), 'rb') as pkl_file: data = pickle.load(pkl_file) graphs = data[0] labels = data[1] test_graphs = data[2] test_labels = data[3] for i in range(len(graphs)): graphs[i].graph['label'] = labels[i] for i in range(len(test_graphs)): test_graphs[i].graph['label'] = test_labels[i] if feat is None: featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for G in test_graphs: featgen_const.gen_node_features(G) train_dataset, test_dataset, max_num_nodes = prepare_data(graphs, args, test_graphs=test_graphs) model = encoders.GcnEncoderGraph( args.input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn).cuda() train(train_dataset, model, args, test_dataset=test_dataset) evaluate(test_dataset, model, args, 'Validation')
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) example_node = util.node_dict(graphs[0])[0] if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in example_node: print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array( util.node_dict(G)[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'amg-assign': print('Method: amd-assign') model = encoders.AmgPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
def gen_er(n_range, p, num_graphs, feature_generator=None): graphs = [] for i in np.random.choice(n_range, num_graphs): graphs.append(nx.erdos_renyi_graph(i, p)) if feature_generator is None: feature_generator = featgen.ConstFeatureGen(0) for G in graphs: feature_generator.gen_node_features(G) return graphs
def gen_ba(n_range, m_range, num_graphs, feature_generator=None): graphs = [] for i in np.random.choice(n_range, num_graphs): for j in np.random.choice(m_range, 1): graphs.append(nx.barabasi_albert_graph(i, j)) if feature_generator is None: feature_generator = featgen.ConstFeatureGen(0) for G in graphs: feature_generator.gen_node_features(G) return graphs
def benchmark_task(args, writer=None, feat='node-label'): graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = \ prepare_data(graphs, args, max_nodes=args.max_nodes) if args.method == 'amg-assign': print('Method: amg-assign') model = encoders.AmgPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer) evaluate(test_dataset, model, args, 'Validation')
def syn_community2hier(args, writer=None): # data feat_gen = [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))] graphs1 = datagen.gen_2hier(1000, [2, 4], 10, range(4, 5), 0.1, 0.03, feat_gen) graphs2 = datagen.gen_2hier(1000, [3, 3], 10, range(4, 5), 0.1, 0.03, feat_gen) graphs3 = datagen.gen_2community_ba(range(28, 33), range(4, 7), 1000, 0.25, feat_gen) for G in graphs1: G.graph['label'] = 0 for G in graphs2: G.graph['label'] = 1 for G in graphs3: G.graph['label'] = 2 graphs = graphs1 + graphs2 + graphs3 train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data( graphs, args) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer)
def benchmark_task_val(args, feat='node-label', pred_hidden_dims = [50], device='cpu'): all_vals = [] data_out_dir = 'data/data_preprocessed/' + args.bmname + '/pool_sizes_' + args.pool_sizes if args.normalize ==0: data_out_dir = data_out_dir + '_nor_' + str(args.normalize) data_out_dir = data_out_dir + '/' if not os.path.exists(data_out_dir): os.makedirs(data_out_dir) graph_list_file_name = data_out_dir + 'graphs_list.p' dataset_file_name = data_out_dir + 'dataset.p' if os.path.isfile(graph_list_file_name) and os.path.isfile(dataset_file_name): print('Files exist, reading from stored files....') print('Reading file from', data_out_dir) with open(dataset_file_name, 'rb') as f: graphs = pickle.load(f) with open(graph_list_file_name, 'rb') as f: graphs_list = pickle.load(f) print('Data loaded!') else: print('No files exist, preprocessing datasets...') graphs = load_data.read_graphfile(args.datadir,args.bmname, max_nodes =args.max_nodes) print('Data length before filtering: ', len(graphs)) dataset_copy = graphs.copy() len_data = len(graphs) graphs_list = [] pool_sizes = [int(i) for i in args.pool_sizes.split('_')] print('pool_sizes: ', pool_sizes ) for i in range(len_data): adj = nx.adjacency_matrix(dataset_copy[i]) # print('Adj shape',adj.shape) if adj.shape[0] < args.min_nodes or adj.shape[0]> args.max_nodes or adj.shape[0]!= dataset_copy[i].number_of_nodes(): graphs.remove(dataset_copy[i]) # index_list.remove(i) else: # print('----------------------', i, adj.shape) number_of_nodes = dataset_copy[i].number_of_nodes() # if args.pool_ratios is not None: # pool_sizes = [] # pre_layer_number_of_nodes = number_of_nodes # for i in range(len(pool_ratios)): # number_of_nodes_after_pool = int(pre_layer_number_of_nodes*pool_ratios[i]) # pool_sizes.append(number_of_nodes_after_pool) # pre_layer_number_of_nodes = number_of_nodes_after_pool # print('Test pool_sizes: ', pool_sizes) coarsen_graph = gp(adj.todense().astype(float), pool_sizes) # if args.method == 'wave': coarsen_graph.coarsening_pooling(args.normalize) graphs_list.append(coarsen_graph) print('Data length after filtering: ', len(graphs), len(graphs_list)) print('Dataset preprocessed, dumping....') with open(dataset_file_name, 'wb') as f: pickle.dump(graphs, f) with open(graph_list_file_name, 'wb') as f: pickle.dump(graphs_list, f) print('Dataset dumped!') if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) # total_test_ac = 0 # total_test_best_ac = 0 # total_best_val_ac = 0 for i in range(10): if i == args.shuffle: if args.with_test: train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim = \ prepare_data(graphs, graphs_list, args, test_graphs = None,max_nodes=args.max_nodes, seed = i) else: train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim = \ prepare_data(graphs, graphs_list, args, test_graphs = [],max_nodes=args.max_nodes, seed = i) out_dir = args.bmname+ '/tar_' + str(args.train_ratio) + '_ter_' + str(args.test_ratio) + '/' + 'num_shuffle' + str(args.num_shuffle) + '/' + 'numconv_' + str(args.num_gc_layers) + '_dp_' + str(args.dropout) + '_wd_' + str(args.weight_decay) + '_b_' + str(args.batch_size) + '_hd_' + str(args.hidden_dim) + '_od_' + str(args.output_dim) + '_ph_' + str(args.pred_hidden) + '_lr_' + str(args.lr) + '_concat_' + str(args.concat) out_dir = out_dir + '_ps_' + args.pool_sizes + '_np_' + str(args.num_pool_matrix) + '_nfp_' + str(args.num_pool_final_matrix) + '_norL_' + str(args.normalize) + '_mask_' + str(args.mask) + '_ne_' + args.norm + '_cf_' + str(args.con_final) results_out_dir = args.out_dir + '/' + args.bmname + '/with_test' + str(args.with_test) + '/using_feat_' + args.feat + '/no_val_results/with_shuffles/' + out_dir + '/' log_out_dir = args.out_dir + '/' + args.bmname + '/with_test' + str(args.with_test) + '/using_feat_' + args.feat + '/no_val_logs/with_shuffles/'+out_dir + '/' if not os.path.exists(results_out_dir): os.makedirs(results_out_dir, exist_ok=True) if not os.path.exists(log_out_dir): os.makedirs(log_out_dir, exist_ok=True) results_out_file = results_out_dir + 'shuffle'+ str(args.shuffle) + '.txt' log_out_file = log_out_dir + 'shuffle' + str(args.shuffle) + '.txt' results_out_file_2 = results_out_dir + 'test_shuffle' + str(args.shuffle) + '.txt' val_out_file = results_out_dir + 'val_result' + str(args.shuffle) + '.txt' print(results_out_file) with open(log_out_file, 'a') as f: f.write('Shuffle ' +str(i) + '====================================================================================\n') pool_sizes = [int(i) for i in args.pool_sizes.split('_')] model = encoders.WavePoolingGcnEncoder(max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.num_pool_matrix, args.num_pool_final_matrix,pool_sizes = pool_sizes, pred_hidden_dims = pred_hidden_dims, concat = args.concat,bn=args.bn, dropout=args.dropout, mask = args.mask,args=args, device=device) if args.with_test: _, val_accs, test_accs, best_val_result = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, log_dir = log_out_file, device=device) else: _, val_accs, test_accs, best_val_result = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, log_dir = log_out_file, device=device) print('Shuffle ', i, '--------- best val result', best_val_result ) if args.with_test: test_ac = test_accs[best_val_result['epoch']] print('Test accuracy: ', test_ac) best_val_ac = best_val_result['acc'] print('Best val on shuffle ', (args.shuffle), best_val_ac) if args.with_test: print('Test on shuffle', args.shuffle,' : ', test_ac) np.savetxt(val_out_file, val_accs) with open(results_out_file, 'w') as f: f.write('Best val on shuffle '+ str(args.shuffle) + ': ' + str(best_val_ac) + '\n') if args.with_test: with open(results_out_file_2, 'w') as f: f.write('Test accuracy on shuffle ' + str( args.shuffle ) + ':' + str(test_ac) + '\n') with open(log_out_file,'a') as f: f.write('Best val on shuffle ' + str(args.shuffle ) + ' : ' + str(best_val_ac) + '\n') if args.with_test: f.write('Test on shuffle ' + str( args.shuffle ) + ' : ' + str(test_ac) + '\n') f.write('------------------------------------------------------------------\n')
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) #args.max_nodes = 600, maxnodes #print("maxnodes=",maxnodes),features #compute_matching_degree(features) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].nodes[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.nodes[u]['feat'] = np.array(G.nodes[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): print("****************", i) train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) temp_input_dim = [] temp_assign_input_dim = [] for i in range(args.num_aspect): temp_input_dim.append(input_dim) temp_assign_input_dim.append(assign_input_dim) input_dim_aspect = temp_input_dim assign_input_dim_aspect = temp_assign_input_dim #assign_input_di, max_graph_node_numm_aspect = [assign_input_dim, assign_input_dim, assign_input_dim] if args.method == 'MxGNN': print('Method: MxGNN') if args.merge_method == 'cat': model = MxGNNCat.SoftPoolingGcnEncoder( max_num_nodes, args.num_aspect, args.multi_conv, args.multi_pool, input_dim_aspect, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim_aspect).cuda() else: model = MxGNNSum.SoftPoolingGcnEncoder( max_num_nodes, args.num_aspect, args.multi_conv, args.multi_pool, input_dim_aspect, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim_aspect).cuda() elif args.method == 'diffpool': print('Method: diffpool') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals)) result = "./result.txt" f = open(result, 'a+') f.write(str(np.max(all_vals))) f.write("\n") f.close()
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) # have value example_node = util.node_dict(graphs[0])[0] if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in example_node: print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array( util.node_dict(G)[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range( min([ 10, max([ int( len(graphs) * (1 - args.train_ratio - args.test_ratio)), 1 ]) ])): '''I revised here 10-> min[...]''' train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, num_unpooling=args.num_unpool, unpool_ratio=args.ratio_unpool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print('all_vals: ', all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
def benchmark_task_val(args, writer=None, feat='node-feat'): all_vals = [] #graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) Hlist, New_G = load_data.read_graphfile2(args.datadir, args.bmname, max_nodes=args.max_nodes) #example_node = util.node_dict(graphs[0])[0] example_node = util.node_dict(New_G)[0] #if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: if feat == 'node-feat' and 'feat_dim' in New_G.graph: print('Using node features') #input_dim =graphs[0].graph['feat_dim'] input_dim = New_G.graph['feat_dim'] elif feat == 'node-label' and 'label' in example_node: print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array( util.node_dict(G)[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) # related to cross_val.py 13d row #cross validation # for i in range(10): # # train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ # # cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) # train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ # cross_val.prepare_val_data2(Hlist,New_G, args, i, max_nodes=args.max_nodes) # if args.method == 'soft-assign': # print('Method: soft-assign') # model = encoders.SoftPoolingGcnEncoder( # max_num_nodes, # input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, # args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, # bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, # assign_input_dim=assign_input_dim).cuda() # elif args.method == 'base-set2set': # print('Method: base-set2set') # model = encoders.GcnSet2SetEncoder( # input_dim, args.hidden_dim, args.output_dim, args.num_classes, # args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() # else: # print('Method: base') # model = encoders.GcnEncoderGraph( # input_dim, args.hidden_dim, args.output_dim, args.num_classes, # args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() # # _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, # writer=writer) # all_vals.append(np.array(val_accs)) train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data2(Hlist, New_G, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) del train_dataset, val_dataset, model torch.cuda.empty_cache() all_vals = np.vstack(all_vals) with open('log_', 'a+') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( 'method', 'gc', 'dim', '10-mean', '10-std', '20-mean', '20-std', '30-mean', '30-std', '40-mean', '40-std', '50-mean', '50-std', '60-mean', '60-std', '70-mean', '70-std', '80-mean', '80-std', '90-mean', '90-std', '100-mean', '100-std', 'std')) f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( args.method, args.num_gc_layers, args.output_dim, all_vals[:, 10-1].mean(), all_vals[:, 10-1].std(), all_vals[:, 20-1].mean(), all_vals[:, 20-1].std(), all_vals[:, 30-1].mean(), all_vals[:, 30-1].std(), all_vals[:, 40-1].mean(), all_vals[:, 40-1].std(), all_vals[:, 50-1].mean(), all_vals[:, 50-1].std(), all_vals[:, 60-1].mean(), all_vals[:, 60-1].std(), all_vals[:, 70-1].mean(), all_vals[:, 70-1].std(), all_vals[:, 80-1].mean(), all_vals[:, 80-1].std(), all_vals[:, 90-1].mean(), all_vals[:, 90-1].std(), all_vals[:, 100-1].mean(), all_vals[:, 100-1].std(), np.max(np.mean(all_vals, axis=0)))) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
from graph_sampler import GraphSampler from torch.autograd import Variable ''' Run. Attach to encoder Change to average pooling ''' # syn_community1v2 input_dim = 10 n_range = range(40, 60) m_range = range(4, 5) num_graphs = 500 feature_generator = featgen.ConstFeatureGen(np.ones(input_dim, dtype=float)) graphs1 = datagen.gen_ba(n_range, m_range, num_graphs, feature_generator) for G in graphs1: G.graph['label'] = 0 n_range = range(20, 30) m_range = range(4, 5) num_graphs = 500 inter_prob = 0.3 feature_generators = [featgen.ConstFeatureGen(np.ones(input_dim, dtype=float))] graphs2 = datagen.gen_2community_ba(n_range, m_range, num_graphs, inter_prob, feature_generators) for G in graphs2: G.graph['label'] = 1 graphs = graphs1 + graphs2