def pkl_task(args, feat=None): with open(os.path.join(args.datadir, args.pkl_fname), 'rb') as pkl_file: data = pickle.load(pkl_file) graphs = data[0] labels = data[1] test_graphs = data[2] test_labels = data[3] for i in range(len(graphs)): graphs[i].graph['label'] = labels[i] for i in range(len(test_graphs)): test_graphs[i].graph['label'] = test_labels[i] if feat is None: featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for G in test_graphs: featgen_const.gen_node_features(G) train_dataset, test_dataset, max_num_nodes = prepare_data(graphs, args, test_graphs=test_graphs) model = encoders.GcnEncoderGraph( args.input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn).cuda() train(train_dataset, model, args, test_dataset=test_dataset) evaluate(test_dataset, model, args, 'Validation')
def syn_community1v2(args, writer=None, export_graphs=False): # data graphs1 = datagen.gen_ba( range(40, 60), range(4, 5), 500, featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))) for G in graphs1: G.graph['label'] = 0 if export_graphs: util.draw_graph_list(graphs1[:16], 4, 4, 'figs/ba') graphs2 = datagen.gen_2community_ba( range(20, 30), range(4, 5), 500, 0.3, [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))]) for G in graphs2: G.graph['label'] = 1 if export_graphs: util.draw_graph_list(graphs2[:16], 4, 4, 'figs/ba2') graphs = graphs1 + graphs2 train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data( graphs, args) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, linkpred=args.linkpred, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, 2, args.num_gc_layers, bn=args.bn).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, 2, args.num_gc_layers, bn=args.bn).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer)
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) example_node = util.node_dict(graphs[0])[0] if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in example_node: print('Using node labels') for G in graphs: for u in G.nodes(): util.node_dict(G)[u]['feat'] = np.array(util.node_dict(G)[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))
def benchmark_task(args, writer=None, feat='node-label'): graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = \ prepare_data(graphs, args, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer) evaluate(test_dataset, model, args, 'Validation')
def syn_community2hier(args, writer=None): # data feat_gen = [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))] graphs1 = datagen.gen_2hier(1000, [2,4], 10, range(4,5), 0.1, 0.03, feat_gen) graphs2 = datagen.gen_2hier(1000, [3,3], 10, range(4,5), 0.1, 0.03, feat_gen) graphs3 = datagen.gen_2community_ba(range(28, 33), range(4,7), 1000, 0.25, feat_gen) for G in graphs1: G.graph['label'] = 0 for G in graphs2: G.graph['label'] = 1 for G in graphs3: G.graph['label'] = 2 graphs = graphs1 + graphs2 + graphs3 train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data(graphs, args) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, 2, args.num_gc_layers, bn=args.bn, args=args, assign_input_dim=assign_input_dim).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, 2, args.num_gc_layers, bn=args.bn, args=args).cuda() train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset, writer=writer)
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) #args.max_nodes = 600, maxnodes #print("maxnodes=",maxnodes),features #compute_matching_degree(features) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].nodes[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.nodes[u]['feat'] = np.array(G.nodes[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): print("****************", i) train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) temp_input_dim = [] temp_assign_input_dim = [] for i in range(args.num_aspect): temp_input_dim.append(input_dim) temp_assign_input_dim.append(assign_input_dim) input_dim_aspect = temp_input_dim assign_input_dim_aspect = temp_assign_input_dim #assign_input_di, max_graph_node_numm_aspect = [assign_input_dim, assign_input_dim, assign_input_dim] if args.method == 'MxGNN': print('Method: MxGNN') if args.merge_method == 'cat': model = MxGNNCat.SoftPoolingGcnEncoder( max_num_nodes, args.num_aspect, args.multi_conv, args.multi_pool, input_dim_aspect, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim_aspect).cuda() else: model = MxGNNSum.SoftPoolingGcnEncoder( max_num_nodes, args.num_aspect, args.multi_conv, args.multi_pool, input_dim_aspect, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim_aspect).cuda() elif args.method == 'diffpool': print('Method: diffpool') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) all_vals = np.vstack(all_vals) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals)) result = "./result.txt" f = open(result, 'a+') f.write(str(np.max(all_vals))) f.write("\n") f.close()
def benchmark_task_val(args, writer=None, feat='node-label'): all_vals = [] graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes) if feat == 'node-feat' and 'feat_dim' in graphs[0].graph: print('Using node features') input_dim = graphs[0].graph['feat_dim'] elif feat == 'node-label' and 'label' in graphs[0].node[0]: print('Using node labels') for G in graphs: for u in G.nodes(): G.node[u]['feat'] = np.array(G.node[u]['label']) else: print('Using constant labels') featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) for G in graphs: featgen_const.gen_node_features(G) for i in range(10): train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \ cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes) if args.method == 'soft-assign': print('Method: soft-assign') model = encoders.SoftPoolingGcnEncoder( max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool, bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda() elif args.method == 'base-set2set': print('Method: base-set2set') model = encoders.GcnSet2SetEncoder( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() else: print('Method: base') model = encoders.GcnEncoderGraph( input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda() _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None, writer=writer) all_vals.append(np.array(val_accs)) del train_dataset, val_dataset, model torch.cuda.empty_cache() all_vals = np.vstack(all_vals) with open('log_', 'a+') as f: f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( 'method', 'gc', 'dim', '10-mean', '10-std', '20-mean', '20-std', '30-mean', '30-std', '40-mean', '40-std', '50-mean', '50-std', '60-mean', '60-std', '70-mean', '70-std', '80-mean', '80-std', '90-mean', '90-std', '100-mean', '100-std', 'std')) f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format( args.method, args.num_gc_layers, args.output_dim, all_vals[:, 10-1].mean(), all_vals[:, 10-1].std(), all_vals[:, 20-1].mean(), all_vals[:, 20-1].std(), all_vals[:, 30-1].mean(), all_vals[:, 30-1].std(), all_vals[:, 40-1].mean(), all_vals[:, 40-1].std(), all_vals[:, 50-1].mean(), all_vals[:, 50-1].std(), all_vals[:, 60-1].mean(), all_vals[:, 60-1].std(), all_vals[:, 70-1].mean(), all_vals[:, 70-1].std(), all_vals[:, 80-1].mean(), all_vals[:, 80-1].std(), all_vals[:, 90-1].mean(), all_vals[:, 90-1].std(), all_vals[:, 100-1].mean(), all_vals[:, 100-1].std(), np.max(np.mean(all_vals, axis=0)))) all_vals = np.mean(all_vals, axis=0) print(all_vals) print(np.max(all_vals)) print(np.argmax(all_vals))