Beispiel #1
0
def syn_community1v2(args, writer=None, export_graphs=False):
    # data
    graphs1 = datagen.gen_ba(
        range(40, 60), range(4, 5), 500,
        featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)))
    for G in graphs1:
        G.graph['label'] = 0
    if export_graphs:
        util.draw_graph_list(graphs1[:16], 4, 4, 'figs/ba')

    graphs2 = datagen.gen_2community_ba(
        range(20, 30), range(4, 5), 500, 0.3,
        [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))])
    for G in graphs2:
        G.graph['label'] = 1
    if export_graphs:
        util.draw_graph_list(graphs2[:16], 4, 4, 'figs/ba2')

    graphs = graphs1 + graphs2

    train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data(
        graphs, args)
    if args.method == 'soft-assign':
        print('Method: soft-assign')
        model = encoders.SoftPoolingGcnEncoder(
            max_num_nodes,
            input_dim,
            args.hidden_dim,
            args.output_dim,
            args.num_classes,
            args.num_gc_layers,
            args.hidden_dim,
            assign_ratio=args.assign_ratio,
            num_pooling=args.num_pool,
            bn=args.bn,
            linkpred=args.linkpred,
            assign_input_dim=assign_input_dim).cuda()
    elif args.method == 'base-set2set':
        print('Method: base-set2set')
        model = encoders.GcnSet2SetEncoder(input_dim,
                                           args.hidden_dim,
                                           args.output_dim,
                                           2,
                                           args.num_gc_layers,
                                           bn=args.bn).cuda()
    else:
        print('Method: base')
        model = encoders.GcnEncoderGraph(input_dim,
                                         args.hidden_dim,
                                         args.output_dim,
                                         2,
                                         args.num_gc_layers,
                                         bn=args.bn).cuda()

    train(train_dataset,
          model,
          args,
          val_dataset=val_dataset,
          test_dataset=test_dataset,
          writer=writer)
Beispiel #2
0
def pkl_task(args, feat=None):
    with open(os.path.join(args.datadir, args.pkl_fname), 'rb') as pkl_file:
        data = pickle.load(pkl_file)
    graphs = data[0]
    labels = data[1]
    test_graphs = data[2]
    test_labels = data[3]

    for i in range(len(graphs)):
        graphs[i].graph['label'] = labels[i]
    for i in range(len(test_graphs)):
        test_graphs[i].graph['label'] = test_labels[i]

    if feat is None:
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)
        for G in test_graphs:
            featgen_const.gen_node_features(G)

    train_dataset, test_dataset, max_num_nodes = prepare_data(graphs, args, test_graphs=test_graphs)
    model = encoders.GcnEncoderGraph(
            args.input_dim, args.hidden_dim, args.output_dim, args.num_classes, 
            args.num_gc_layers, bn=args.bn).cuda()
    train(train_dataset, model, args, test_dataset=test_dataset)
    evaluate(test_dataset, model, args, 'Validation')
Beispiel #3
0
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir,
                                      args.bmname,
                                      max_nodes=args.max_nodes)

    example_node = util.node_dict(graphs[0])[0]

    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in example_node:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                util.node_dict(G)[u]['feat'] = np.array(
                    util.node_dict(G)[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(10):
        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        if args.method == 'amg-assign':
            print('Method: amd-assign')
            model = encoders.AmgPoolingGcnEncoder(
                max_num_nodes,
                input_dim,
                args.hidden_dim,
                args.output_dim,
                args.num_classes,
                args.num_gc_layers,
                args.hidden_dim,
                assign_ratio=args.assign_ratio,
                num_pooling=args.num_pool,
                bn=args.bn,
                dropout=args.dropout,
                linkpred=args.linkpred,
                args=args,
                assign_input_dim=assign_input_dim).cuda()

        _, val_accs = train(train_dataset,
                            model,
                            args,
                            val_dataset=val_dataset,
                            test_dataset=None,
                            writer=writer)
        all_vals.append(np.array(val_accs))
    all_vals = np.vstack(all_vals)
    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
Beispiel #4
0
def gen_er(n_range, p, num_graphs, feature_generator=None):
    graphs = []
    for i in np.random.choice(n_range, num_graphs):
        graphs.append(nx.erdos_renyi_graph(i, p))

    if feature_generator is None:
        feature_generator = featgen.ConstFeatureGen(0)
    for G in graphs:
        feature_generator.gen_node_features(G)
    return graphs
Beispiel #5
0
def gen_ba(n_range, m_range, num_graphs, feature_generator=None):
    graphs = []
    for i in np.random.choice(n_range, num_graphs):
        for j in np.random.choice(m_range, 1):
            graphs.append(nx.barabasi_albert_graph(i, j))

    if feature_generator is None:
        feature_generator = featgen.ConstFeatureGen(0)
    for G in graphs:
        feature_generator.gen_node_features(G)
    return graphs
Beispiel #6
0
def benchmark_task(args, writer=None, feat='node-label'):
    graphs = load_data.read_graphfile(args.datadir,
                                      args.bmname,
                                      max_nodes=args.max_nodes)

    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in graphs[0].node[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.node[u]['feat'] = np.array(G.node[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = \
            prepare_data(graphs, args, max_nodes=args.max_nodes)

    if args.method == 'amg-assign':
        print('Method: amg-assign')
        model = encoders.AmgPoolingGcnEncoder(
            max_num_nodes,
            input_dim,
            args.hidden_dim,
            args.output_dim,
            args.num_classes,
            args.num_gc_layers,
            args.hidden_dim,
            assign_ratio=args.assign_ratio,
            num_pooling=args.num_pool,
            bn=args.bn,
            dropout=args.dropout,
            linkpred=args.linkpred,
            args=args,
            assign_input_dim=assign_input_dim).cuda()

    train(train_dataset,
          model,
          args,
          val_dataset=val_dataset,
          test_dataset=test_dataset,
          writer=writer)
    evaluate(test_dataset, model, args, 'Validation')
Beispiel #7
0
def syn_community2hier(args, writer=None):

    # data
    feat_gen = [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))]
    graphs1 = datagen.gen_2hier(1000, [2, 4], 10, range(4, 5), 0.1, 0.03,
                                feat_gen)
    graphs2 = datagen.gen_2hier(1000, [3, 3], 10, range(4, 5), 0.1, 0.03,
                                feat_gen)
    graphs3 = datagen.gen_2community_ba(range(28, 33), range(4, 7), 1000, 0.25,
                                        feat_gen)

    for G in graphs1:
        G.graph['label'] = 0
    for G in graphs2:
        G.graph['label'] = 1
    for G in graphs3:
        G.graph['label'] = 2

    graphs = graphs1 + graphs2 + graphs3

    train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data(
        graphs, args)

    if args.method == 'soft-assign':
        print('Method: soft-assign')
        model = encoders.SoftPoolingGcnEncoder(
            max_num_nodes,
            input_dim,
            args.hidden_dim,
            args.output_dim,
            args.num_classes,
            args.num_gc_layers,
            args.hidden_dim,
            assign_ratio=args.assign_ratio,
            num_pooling=args.num_pool,
            bn=args.bn,
            linkpred=args.linkpred,
            args=args,
            assign_input_dim=assign_input_dim).cuda()

    train(train_dataset,
          model,
          args,
          val_dataset=val_dataset,
          test_dataset=test_dataset,
          writer=writer)
Beispiel #8
0
def benchmark_task_val(args, feat='node-label', pred_hidden_dims = [50], device='cpu'):

    all_vals = []

    data_out_dir = 'data/data_preprocessed/' + args.bmname + '/pool_sizes_' + args.pool_sizes 
    if args.normalize ==0:
        data_out_dir = data_out_dir + '_nor_' + str(args.normalize)



    data_out_dir = data_out_dir + '/'
    if not os.path.exists(data_out_dir):
        os.makedirs(data_out_dir)

    graph_list_file_name = data_out_dir + 'graphs_list.p' 
    dataset_file_name = data_out_dir + 'dataset.p'

    if os.path.isfile(graph_list_file_name) and os.path.isfile(dataset_file_name):
        print('Files exist, reading from stored files....')
        print('Reading file from', data_out_dir)
        with open(dataset_file_name, 'rb') as f:
            graphs = pickle.load(f)
        with open(graph_list_file_name, 'rb') as f:
            graphs_list = pickle.load(f)
        print('Data loaded!')
    else:
        print('No files exist, preprocessing datasets...')





        graphs = load_data.read_graphfile(args.datadir,args.bmname, max_nodes =args.max_nodes)
        print('Data length before filtering: ', len(graphs))

        dataset_copy = graphs.copy()



        len_data = len(graphs)
        graphs_list = []
        pool_sizes = [int(i) for i in args.pool_sizes.split('_')]
        print('pool_sizes: ', pool_sizes )



        for i in range(len_data):


            adj = nx.adjacency_matrix(dataset_copy[i])






            # print('Adj shape',adj.shape)
            if adj.shape[0] < args.min_nodes or adj.shape[0]> args.max_nodes or adj.shape[0]!= dataset_copy[i].number_of_nodes():
                graphs.remove(dataset_copy[i])
                # index_list.remove(i)
            else:
                # print('----------------------', i, adj.shape)
                number_of_nodes = dataset_copy[i].number_of_nodes()
                # if args.pool_ratios is not None:
                #     pool_sizes = []
                #     pre_layer_number_of_nodes = number_of_nodes
                #     for i in range(len(pool_ratios)):
                #         number_of_nodes_after_pool = int(pre_layer_number_of_nodes*pool_ratios[i])
                #         pool_sizes.append(number_of_nodes_after_pool)
                #         pre_layer_number_of_nodes = number_of_nodes_after_pool



                # print('Test pool_sizes:  ', pool_sizes)
                coarsen_graph = gp(adj.todense().astype(float), pool_sizes)
                # if args.method == 'wave':
                coarsen_graph.coarsening_pooling(args.normalize)



                graphs_list.append(coarsen_graph)


        print('Data length after filtering: ', len(graphs), len(graphs_list))
        print('Dataset preprocessed, dumping....')
        with open(dataset_file_name, 'wb') as f:
            pickle.dump(graphs, f)
        with open(graph_list_file_name, 'wb') as f:
            pickle.dump(graphs_list, f)

        print('Dataset dumped!')




    
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']

    elif feat == 'node-label' and 'label' in graphs[0].node[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.node[u]['feat'] = np.array(G.node[u]['label'])

    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)



    # total_test_ac = 0
    # total_test_best_ac = 0
    # total_best_val_ac = 0
    for i in range(10):

        if i == args.shuffle:

            if args.with_test:
                train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim = \
                        prepare_data(graphs, graphs_list, args, test_graphs = None,max_nodes=args.max_nodes, seed = i)
            else:
                train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim = \
                        prepare_data(graphs, graphs_list, args, test_graphs = [],max_nodes=args.max_nodes, seed = i)
            out_dir = args.bmname+ '/tar_' + str(args.train_ratio) + '_ter_' + str(args.test_ratio) + '/'   +  'num_shuffle' + str(args.num_shuffle)  + '/' +  'numconv_' + str(args.num_gc_layers) + '_dp_' + str(args.dropout) + '_wd_' + str(args.weight_decay) + '_b_' + str(args.batch_size) + '_hd_' + str(args.hidden_dim) + '_od_' + str(args.output_dim)  + '_ph_' + str(args.pred_hidden) + '_lr_' + str(args.lr)  + '_concat_' + str(args.concat)

            out_dir = out_dir + '_ps_' + args.pool_sizes  + '_np_' + str(args.num_pool_matrix) + '_nfp_' + str(args.num_pool_final_matrix) + '_norL_' + str(args.normalize)  + '_mask_' + str(args.mask) + '_ne_' + args.norm  + '_cf_' + str(args.con_final)


            results_out_dir = args.out_dir + '/'  + args.bmname + '/with_test' + str(args.with_test) +  '/using_feat_' + args.feat + '/no_val_results/with_shuffles/' + out_dir + '/'
            log_out_dir = args.out_dir  + '/' + args.bmname + '/with_test' + str(args.with_test) + '/using_feat_' + args.feat + '/no_val_logs/with_shuffles/'+out_dir + '/'

            if not os.path.exists(results_out_dir):
                os.makedirs(results_out_dir, exist_ok=True)
            if not os.path.exists(log_out_dir):
                os.makedirs(log_out_dir, exist_ok=True)

            results_out_file = results_out_dir + 'shuffle'+  str(args.shuffle) + '.txt' 
            log_out_file = log_out_dir + 'shuffle' + str(args.shuffle) + '.txt'
            results_out_file_2 = results_out_dir + 'test_shuffle' + str(args.shuffle) + '.txt'
            val_out_file = results_out_dir + 'val_result' + str(args.shuffle) + '.txt'
            print(results_out_file)



            with open(log_out_file, 'a') as f:
                f.write('Shuffle ' +str(i) + '====================================================================================\n')



            pool_sizes = [int(i) for i in args.pool_sizes.split('_')]
            model = encoders.WavePoolingGcnEncoder(max_num_nodes, input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers, args.num_pool_matrix, args.num_pool_final_matrix,pool_sizes =  pool_sizes, pred_hidden_dims = pred_hidden_dims, concat = args.concat,bn=args.bn, dropout=args.dropout, mask = args.mask,args=args, device=device)

            if args.with_test:
                _, val_accs, test_accs, best_val_result = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset,
                 log_dir = log_out_file, device=device)
            else:                
                _, val_accs, test_accs, best_val_result = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None,
                 log_dir = log_out_file, device=device)

            print('Shuffle ', i, '--------- best val result', best_val_result )


            if args.with_test:
                test_ac = test_accs[best_val_result['epoch']]
                print('Test accuracy: ', test_ac)
            best_val_ac =  best_val_result['acc']
 



            print('Best val on shuffle ', (args.shuffle), best_val_ac)
            if args.with_test:
                print('Test on shuffle', args.shuffle,' : ', test_ac)

    

 

    np.savetxt(val_out_file, val_accs)

    with open(results_out_file, 'w') as f:
        f.write('Best val on shuffle '+ str(args.shuffle) + ': ' + str(best_val_ac) + '\n')
    if args.with_test:
        with open(results_out_file_2, 'w') as f:
            f.write('Test accuracy on shuffle ' + str( args.shuffle  ) +  ':' + str(test_ac) + '\n') 


    with open(log_out_file,'a') as f:


        f.write('Best val on shuffle ' + str(args.shuffle ) + ' : ' + str(best_val_ac) + '\n')
        if args.with_test:
            f.write('Test on shuffle ' + str( args.shuffle  ) +  ' : ' + str(test_ac) + '\n') 
        f.write('------------------------------------------------------------------\n')
Beispiel #9
0
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir,
                                      args.bmname,
                                      max_nodes=args.max_nodes)
    #args.max_nodes = 600, maxnodes
    #print("maxnodes=",maxnodes),features
    #compute_matching_degree(features)
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in graphs[0].nodes[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.nodes[u]['feat'] = np.array(G.nodes[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(10):
        print("****************", i)
        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        temp_input_dim = []
        temp_assign_input_dim = []
        for i in range(args.num_aspect):
            temp_input_dim.append(input_dim)
            temp_assign_input_dim.append(assign_input_dim)
        input_dim_aspect = temp_input_dim
        assign_input_dim_aspect = temp_assign_input_dim
        #assign_input_di, max_graph_node_numm_aspect = [assign_input_dim, assign_input_dim, assign_input_dim]
        if args.method == 'MxGNN':
            print('Method: MxGNN')
            if args.merge_method == 'cat':
                model = MxGNNCat.SoftPoolingGcnEncoder(
                    max_num_nodes,
                    args.num_aspect,
                    args.multi_conv,
                    args.multi_pool,
                    input_dim_aspect,
                    args.hidden_dim,
                    args.output_dim,
                    args.num_classes,
                    args.num_gc_layers,
                    args.hidden_dim,
                    assign_ratio=args.assign_ratio,
                    num_pooling=args.num_pool,
                    bn=args.bn,
                    dropout=args.dropout,
                    linkpred=args.linkpred,
                    args=args,
                    assign_input_dim=assign_input_dim_aspect).cuda()
            else:
                model = MxGNNSum.SoftPoolingGcnEncoder(
                    max_num_nodes,
                    args.num_aspect,
                    args.multi_conv,
                    args.multi_pool,
                    input_dim_aspect,
                    args.hidden_dim,
                    args.output_dim,
                    args.num_classes,
                    args.num_gc_layers,
                    args.hidden_dim,
                    assign_ratio=args.assign_ratio,
                    num_pooling=args.num_pool,
                    bn=args.bn,
                    dropout=args.dropout,
                    linkpred=args.linkpred,
                    args=args,
                    assign_input_dim=assign_input_dim_aspect).cuda()
        elif args.method == 'diffpool':
            print('Method: diffpool')
            model = encoders.SoftPoolingGcnEncoder(
                max_num_nodes,
                input_dim,
                args.hidden_dim,
                args.output_dim,
                args.num_classes,
                args.num_gc_layers,
                args.hidden_dim,
                assign_ratio=args.assign_ratio,
                num_pooling=args.num_pool,
                bn=args.bn,
                dropout=args.dropout,
                linkpred=args.linkpred,
                args=args,
                assign_input_dim=assign_input_dim).cuda()
        else:
            print('Method: base')
            model = encoders.GcnEncoderGraph(input_dim,
                                             args.hidden_dim,
                                             args.output_dim,
                                             args.num_classes,
                                             args.num_gc_layers,
                                             bn=args.bn,
                                             dropout=args.dropout,
                                             args=args).cuda()

        _, val_accs = train(train_dataset,
                            model,
                            args,
                            val_dataset=val_dataset,
                            test_dataset=None,
                            writer=writer)
        all_vals.append(np.array(val_accs))

    all_vals = np.vstack(all_vals)
    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
    result = "./result.txt"
    f = open(result, 'a+')
    f.write(str(np.max(all_vals)))
    f.write("\n")
    f.close()
Beispiel #10
0
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir,
                                      args.bmname,
                                      max_nodes=args.max_nodes)  # have value

    example_node = util.node_dict(graphs[0])[0]

    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in example_node:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                util.node_dict(G)[u]['feat'] = np.array(
                    util.node_dict(G)[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(
            min([
                10,
                max([
                    int(
                        len(graphs) *
                        (1 - args.train_ratio - args.test_ratio)), 1
                ])
            ])):
        '''I revised here 10-> min[...]'''

        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        if args.method == 'soft-assign':
            print('Method: soft-assign')
            model = encoders.SoftPoolingGcnEncoder(
                max_num_nodes,
                input_dim,
                args.hidden_dim,
                args.output_dim,
                args.num_classes,
                args.num_gc_layers,
                args.hidden_dim,
                assign_ratio=args.assign_ratio,
                num_pooling=args.num_pool,
                num_unpooling=args.num_unpool,
                unpool_ratio=args.ratio_unpool,
                bn=args.bn,
                dropout=args.dropout,
                linkpred=args.linkpred,
                args=args,
                assign_input_dim=assign_input_dim).cuda()
        elif args.method == 'base-set2set':
            print('Method: base-set2set')
            model = encoders.GcnSet2SetEncoder(input_dim,
                                               args.hidden_dim,
                                               args.output_dim,
                                               args.num_classes,
                                               args.num_gc_layers,
                                               bn=args.bn,
                                               dropout=args.dropout,
                                               args=args).cuda()
        else:
            print('Method: base')
            model = encoders.GcnEncoderGraph(input_dim,
                                             args.hidden_dim,
                                             args.output_dim,
                                             args.num_classes,
                                             args.num_gc_layers,
                                             bn=args.bn,
                                             dropout=args.dropout,
                                             args=args).cuda()

        _, val_accs = train(train_dataset,
                            model,
                            args,
                            val_dataset=val_dataset,
                            test_dataset=None,
                            writer=writer)
        all_vals.append(np.array(val_accs))
    all_vals = np.vstack(all_vals)
    all_vals = np.mean(all_vals, axis=0)
    print('all_vals: ', all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
Beispiel #11
0
def benchmark_task_val(args, writer=None, feat='node-feat'):
    all_vals = []
    #graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes)
    Hlist, New_G = load_data.read_graphfile2(args.datadir,
                                             args.bmname,
                                             max_nodes=args.max_nodes)
    #example_node = util.node_dict(graphs[0])[0]
    example_node = util.node_dict(New_G)[0]

    #if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
    if feat == 'node-feat' and 'feat_dim' in New_G.graph:
        print('Using node features')
        #input_dim =graphs[0].graph['feat_dim']
        input_dim = New_G.graph['feat_dim']
    elif feat == 'node-label' and 'label' in example_node:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                util.node_dict(G)[u]['feat'] = np.array(
                    util.node_dict(G)[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    # related to cross_val.py 13d row
    #cross validation
    # for i in range(10):
    #     # train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
    #     #     cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
    #     train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
    #         cross_val.prepare_val_data2(Hlist,New_G, args, i, max_nodes=args.max_nodes)
    #     if args.method == 'soft-assign':
    #         print('Method: soft-assign')
    #         model = encoders.SoftPoolingGcnEncoder(
    #             max_num_nodes,
    #             input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers,
    #             args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool,
    #             bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args,
    #             assign_input_dim=assign_input_dim).cuda()
    #     elif args.method == 'base-set2set':
    #         print('Method: base-set2set')
    #         model = encoders.GcnSet2SetEncoder(
    #             input_dim, args.hidden_dim, args.output_dim, args.num_classes,
    #             args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()
    #     else:
    #         print('Method: base')
    #         model = encoders.GcnEncoderGraph(
    #             input_dim, args.hidden_dim, args.output_dim, args.num_classes,
    #             args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()
    #
    #     _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None,
    #                         writer=writer)
    #     all_vals.append(np.array(val_accs))



    train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
        cross_val.prepare_val_data2(Hlist, New_G, args, i, max_nodes=args.max_nodes)
    if args.method == 'soft-assign':
        print('Method: soft-assign')
        model = encoders.SoftPoolingGcnEncoder(
            max_num_nodes,
            input_dim,
            args.hidden_dim,
            args.output_dim,
            args.num_classes,
            args.num_gc_layers,
            args.hidden_dim,
            assign_ratio=args.assign_ratio,
            num_pooling=args.num_pool,
            bn=args.bn,
            dropout=args.dropout,
            linkpred=args.linkpred,
            args=args,
            assign_input_dim=assign_input_dim).cuda()
    elif args.method == 'base-set2set':
        print('Method: base-set2set')
        model = encoders.GcnSet2SetEncoder(input_dim,
                                           args.hidden_dim,
                                           args.output_dim,
                                           args.num_classes,
                                           args.num_gc_layers,
                                           bn=args.bn,
                                           dropout=args.dropout,
                                           args=args).cuda()
    else:
        print('Method: base')
        model = encoders.GcnEncoderGraph(input_dim,
                                         args.hidden_dim,
                                         args.output_dim,
                                         args.num_classes,
                                         args.num_gc_layers,
                                         bn=args.bn,
                                         dropout=args.dropout,
                                         args=args).cuda()

    _, val_accs = train(train_dataset,
                        model,
                        args,
                        val_dataset=val_dataset,
                        test_dataset=None,
                        writer=writer)
    all_vals.append(np.array(val_accs))
    all_vals = np.vstack(all_vals)
    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes)
    
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in graphs[0].node[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.node[u]['feat'] = np.array(G.node[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(10):
        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        if args.method == 'soft-assign':
            print('Method: soft-assign')
            model = encoders.SoftPoolingGcnEncoder(
                    max_num_nodes, 
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers,
                    args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool,
                    bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args,
                    assign_input_dim=assign_input_dim).cuda()
        elif args.method == 'base-set2set':
            print('Method: base-set2set')
            model = encoders.GcnSet2SetEncoder(
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes,
                    args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()
        else:
            print('Method: base')
            model = encoders.GcnEncoderGraph(
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes, 
                    args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()

        _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None,
            writer=writer)
        all_vals.append(np.array(val_accs))
        del train_dataset, val_dataset, model
        torch.cuda.empty_cache()

    all_vals = np.vstack(all_vals)

    with open('log_', 'a+') as f:
        f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            'method', 'gc', 'dim', 
            '10-mean', '10-std',
            '20-mean', '20-std',
            '30-mean', '30-std',
            '40-mean', '40-std',
            '50-mean', '50-std',
            '60-mean', '60-std',
            '70-mean', '70-std',
            '80-mean', '80-std',
            '90-mean', '90-std',
            '100-mean', '100-std', 'std'))

        f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            args.method, args.num_gc_layers, args.output_dim,
            all_vals[:, 10-1].mean(), all_vals[:, 10-1].std(),
            all_vals[:, 20-1].mean(), all_vals[:, 20-1].std(),
            all_vals[:, 30-1].mean(), all_vals[:, 30-1].std(),
            all_vals[:, 40-1].mean(), all_vals[:, 40-1].std(),
            all_vals[:, 50-1].mean(), all_vals[:, 50-1].std(),
            all_vals[:, 60-1].mean(), all_vals[:, 60-1].std(),
            all_vals[:, 70-1].mean(), all_vals[:, 70-1].std(),
            all_vals[:, 80-1].mean(), all_vals[:, 80-1].std(),
            all_vals[:, 90-1].mean(), all_vals[:, 90-1].std(),
            all_vals[:, 100-1].mean(), all_vals[:, 100-1].std(),
            np.max(np.mean(all_vals, axis=0))))

    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
Beispiel #13
0
from graph_sampler import GraphSampler
from torch.autograd import Variable
'''
Run.
Attach to encoder
Change to average pooling
'''

# syn_community1v2

input_dim = 10

n_range = range(40, 60)
m_range = range(4, 5)
num_graphs = 500
feature_generator = featgen.ConstFeatureGen(np.ones(input_dim, dtype=float))
graphs1 = datagen.gen_ba(n_range, m_range, num_graphs, feature_generator)
for G in graphs1:
    G.graph['label'] = 0

n_range = range(20, 30)
m_range = range(4, 5)
num_graphs = 500
inter_prob = 0.3
feature_generators = [featgen.ConstFeatureGen(np.ones(input_dim, dtype=float))]
graphs2 = datagen.gen_2community_ba(n_range, m_range, num_graphs, inter_prob,
                                    feature_generators)
for G in graphs2:
    G.graph['label'] = 1

graphs = graphs1 + graphs2