Ejemplo n.º 1
0
def pkl_task(args, feat=None):
    with open(os.path.join(args.datadir, args.pkl_fname), 'rb') as pkl_file:
        data = pickle.load(pkl_file)
    graphs = data[0]
    labels = data[1]
    test_graphs = data[2]
    test_labels = data[3]

    for i in range(len(graphs)):
        graphs[i].graph['label'] = labels[i]
    for i in range(len(test_graphs)):
        test_graphs[i].graph['label'] = test_labels[i]

    if feat is None:
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)
        for G in test_graphs:
            featgen_const.gen_node_features(G)

    train_dataset, test_dataset, max_num_nodes = prepare_data(graphs, args, test_graphs=test_graphs)
    model = encoders.GcnEncoderGraph(
            args.input_dim, args.hidden_dim, args.output_dim, args.num_classes, 
            args.num_gc_layers, bn=args.bn).cuda()
    train(train_dataset, model, args, test_dataset=test_dataset)
    evaluate(test_dataset, model, args, 'Validation')
Ejemplo n.º 2
0
def syn_community1v2(args, writer=None, export_graphs=False):
    # data
    graphs1 = datagen.gen_ba(
        range(40, 60), range(4, 5), 500,
        featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)))
    for G in graphs1:
        G.graph['label'] = 0
    if export_graphs:
        util.draw_graph_list(graphs1[:16], 4, 4, 'figs/ba')

    graphs2 = datagen.gen_2community_ba(
        range(20, 30), range(4, 5), 500, 0.3,
        [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))])
    for G in graphs2:
        G.graph['label'] = 1
    if export_graphs:
        util.draw_graph_list(graphs2[:16], 4, 4, 'figs/ba2')

    graphs = graphs1 + graphs2

    train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data(
        graphs, args)
    if args.method == 'soft-assign':
        print('Method: soft-assign')
        model = encoders.SoftPoolingGcnEncoder(
            max_num_nodes,
            input_dim,
            args.hidden_dim,
            args.output_dim,
            args.num_classes,
            args.num_gc_layers,
            args.hidden_dim,
            assign_ratio=args.assign_ratio,
            num_pooling=args.num_pool,
            bn=args.bn,
            linkpred=args.linkpred,
            assign_input_dim=assign_input_dim).cuda()
    elif args.method == 'base-set2set':
        print('Method: base-set2set')
        model = encoders.GcnSet2SetEncoder(input_dim,
                                           args.hidden_dim,
                                           args.output_dim,
                                           2,
                                           args.num_gc_layers,
                                           bn=args.bn).cuda()
    else:
        print('Method: base')
        model = encoders.GcnEncoderGraph(input_dim,
                                         args.hidden_dim,
                                         args.output_dim,
                                         2,
                                         args.num_gc_layers,
                                         bn=args.bn).cuda()

    train(train_dataset,
          model,
          args,
          val_dataset=val_dataset,
          test_dataset=test_dataset,
          writer=writer)
Ejemplo n.º 3
0
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes)

    example_node = util.node_dict(graphs[0])[0]
    
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in example_node:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                util.node_dict(G)[u]['feat'] = np.array(util.node_dict(G)[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(10):
        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        if args.method == 'soft-assign':
            print('Method: soft-assign')
            model = encoders.SoftPoolingGcnEncoder(
                    max_num_nodes, 
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers,
                    args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool,
                    bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args,
                    assign_input_dim=assign_input_dim).cuda()
        elif args.method == 'base-set2set':
            print('Method: base-set2set')
            model = encoders.GcnSet2SetEncoder(
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes,
                    args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()
        else:
            print('Method: base')
            model = encoders.GcnEncoderGraph(
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes, 
                    args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()

        _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None,
            writer=writer)
        all_vals.append(np.array(val_accs))
    all_vals = np.vstack(all_vals)
    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
Ejemplo n.º 4
0
def benchmark_task(args, writer=None, feat='node-label'):
    graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes)
    
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in graphs[0].node[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.node[u]['feat'] = np.array(G.node[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = \
            prepare_data(graphs, args, max_nodes=args.max_nodes)
    if args.method == 'soft-assign':
        print('Method: soft-assign')
        model = encoders.SoftPoolingGcnEncoder(
                max_num_nodes, 
                input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers,
                args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool,
                bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args,
                assign_input_dim=assign_input_dim).cuda()
    elif args.method == 'base-set2set':
        print('Method: base-set2set')
        model = encoders.GcnSet2SetEncoder(
                input_dim, args.hidden_dim, args.output_dim, args.num_classes,
                args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()
    else:
        print('Method: base')
        model = encoders.GcnEncoderGraph(
                input_dim, args.hidden_dim, args.output_dim, args.num_classes, 
                args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()

    train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset,
            writer=writer)
    evaluate(test_dataset, model, args, 'Validation')
Ejemplo n.º 5
0
def syn_community2hier(args, writer=None):

    # data
    feat_gen = [featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))]
    graphs1 = datagen.gen_2hier(1000, [2,4], 10, range(4,5), 0.1, 0.03, feat_gen)
    graphs2 = datagen.gen_2hier(1000, [3,3], 10, range(4,5), 0.1, 0.03, feat_gen)
    graphs3 = datagen.gen_2community_ba(range(28, 33), range(4,7), 1000, 0.25, feat_gen)

    for G in graphs1:
        G.graph['label'] = 0
    for G in graphs2:
        G.graph['label'] = 1
    for G in graphs3:
        G.graph['label'] = 2

    graphs = graphs1 + graphs2 + graphs3

    train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = prepare_data(graphs, args)

    if args.method == 'soft-assign':
        print('Method: soft-assign')
        model = encoders.SoftPoolingGcnEncoder(
                max_num_nodes, 
                input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers,
                args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool,
                bn=args.bn, linkpred=args.linkpred, args=args, assign_input_dim=assign_input_dim).cuda()
    elif args.method == 'base-set2set':
        print('Method: base-set2set')
        model = encoders.GcnSet2SetEncoder(input_dim, args.hidden_dim, args.output_dim, 2,
                args.num_gc_layers, bn=args.bn, args=args, assign_input_dim=assign_input_dim).cuda()
    else:
        print('Method: base')
        model = encoders.GcnEncoderGraph(input_dim, args.hidden_dim, args.output_dim, 2,
                args.num_gc_layers, bn=args.bn, args=args).cuda()
    train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=test_dataset,
            writer=writer)
Ejemplo n.º 6
0
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir,
                                      args.bmname,
                                      max_nodes=args.max_nodes)
    #args.max_nodes = 600, maxnodes
    #print("maxnodes=",maxnodes),features
    #compute_matching_degree(features)
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in graphs[0].nodes[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.nodes[u]['feat'] = np.array(G.nodes[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(10):
        print("****************", i)
        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        temp_input_dim = []
        temp_assign_input_dim = []
        for i in range(args.num_aspect):
            temp_input_dim.append(input_dim)
            temp_assign_input_dim.append(assign_input_dim)
        input_dim_aspect = temp_input_dim
        assign_input_dim_aspect = temp_assign_input_dim
        #assign_input_di, max_graph_node_numm_aspect = [assign_input_dim, assign_input_dim, assign_input_dim]
        if args.method == 'MxGNN':
            print('Method: MxGNN')
            if args.merge_method == 'cat':
                model = MxGNNCat.SoftPoolingGcnEncoder(
                    max_num_nodes,
                    args.num_aspect,
                    args.multi_conv,
                    args.multi_pool,
                    input_dim_aspect,
                    args.hidden_dim,
                    args.output_dim,
                    args.num_classes,
                    args.num_gc_layers,
                    args.hidden_dim,
                    assign_ratio=args.assign_ratio,
                    num_pooling=args.num_pool,
                    bn=args.bn,
                    dropout=args.dropout,
                    linkpred=args.linkpred,
                    args=args,
                    assign_input_dim=assign_input_dim_aspect).cuda()
            else:
                model = MxGNNSum.SoftPoolingGcnEncoder(
                    max_num_nodes,
                    args.num_aspect,
                    args.multi_conv,
                    args.multi_pool,
                    input_dim_aspect,
                    args.hidden_dim,
                    args.output_dim,
                    args.num_classes,
                    args.num_gc_layers,
                    args.hidden_dim,
                    assign_ratio=args.assign_ratio,
                    num_pooling=args.num_pool,
                    bn=args.bn,
                    dropout=args.dropout,
                    linkpred=args.linkpred,
                    args=args,
                    assign_input_dim=assign_input_dim_aspect).cuda()
        elif args.method == 'diffpool':
            print('Method: diffpool')
            model = encoders.SoftPoolingGcnEncoder(
                max_num_nodes,
                input_dim,
                args.hidden_dim,
                args.output_dim,
                args.num_classes,
                args.num_gc_layers,
                args.hidden_dim,
                assign_ratio=args.assign_ratio,
                num_pooling=args.num_pool,
                bn=args.bn,
                dropout=args.dropout,
                linkpred=args.linkpred,
                args=args,
                assign_input_dim=assign_input_dim).cuda()
        else:
            print('Method: base')
            model = encoders.GcnEncoderGraph(input_dim,
                                             args.hidden_dim,
                                             args.output_dim,
                                             args.num_classes,
                                             args.num_gc_layers,
                                             bn=args.bn,
                                             dropout=args.dropout,
                                             args=args).cuda()

        _, val_accs = train(train_dataset,
                            model,
                            args,
                            val_dataset=val_dataset,
                            test_dataset=None,
                            writer=writer)
        all_vals.append(np.array(val_accs))

    all_vals = np.vstack(all_vals)
    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))
    result = "./result.txt"
    f = open(result, 'a+')
    f.write(str(np.max(all_vals)))
    f.write("\n")
    f.close()
Ejemplo n.º 7
0
def benchmark_task_val(args, writer=None, feat='node-label'):
    all_vals = []
    graphs = load_data.read_graphfile(args.datadir, args.bmname, max_nodes=args.max_nodes)
    
    if feat == 'node-feat' and 'feat_dim' in graphs[0].graph:
        print('Using node features')
        input_dim = graphs[0].graph['feat_dim']
    elif feat == 'node-label' and 'label' in graphs[0].node[0]:
        print('Using node labels')
        for G in graphs:
            for u in G.nodes():
                G.node[u]['feat'] = np.array(G.node[u]['label'])
    else:
        print('Using constant labels')
        featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float))
        for G in graphs:
            featgen_const.gen_node_features(G)

    for i in range(10):
        train_dataset, val_dataset, max_num_nodes, input_dim, assign_input_dim = \
                cross_val.prepare_val_data(graphs, args, i, max_nodes=args.max_nodes)
        if args.method == 'soft-assign':
            print('Method: soft-assign')
            model = encoders.SoftPoolingGcnEncoder(
                    max_num_nodes, 
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes, args.num_gc_layers,
                    args.hidden_dim, assign_ratio=args.assign_ratio, num_pooling=args.num_pool,
                    bn=args.bn, dropout=args.dropout, linkpred=args.linkpred, args=args,
                    assign_input_dim=assign_input_dim).cuda()
        elif args.method == 'base-set2set':
            print('Method: base-set2set')
            model = encoders.GcnSet2SetEncoder(
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes,
                    args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()
        else:
            print('Method: base')
            model = encoders.GcnEncoderGraph(
                    input_dim, args.hidden_dim, args.output_dim, args.num_classes, 
                    args.num_gc_layers, bn=args.bn, dropout=args.dropout, args=args).cuda()

        _, val_accs = train(train_dataset, model, args, val_dataset=val_dataset, test_dataset=None,
            writer=writer)
        all_vals.append(np.array(val_accs))
        del train_dataset, val_dataset, model
        torch.cuda.empty_cache()

    all_vals = np.vstack(all_vals)

    with open('log_', 'a+') as f:
        f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            'method', 'gc', 'dim', 
            '10-mean', '10-std',
            '20-mean', '20-std',
            '30-mean', '30-std',
            '40-mean', '40-std',
            '50-mean', '50-std',
            '60-mean', '60-std',
            '70-mean', '70-std',
            '80-mean', '80-std',
            '90-mean', '90-std',
            '100-mean', '100-std', 'std'))

        f.write('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(
            args.method, args.num_gc_layers, args.output_dim,
            all_vals[:, 10-1].mean(), all_vals[:, 10-1].std(),
            all_vals[:, 20-1].mean(), all_vals[:, 20-1].std(),
            all_vals[:, 30-1].mean(), all_vals[:, 30-1].std(),
            all_vals[:, 40-1].mean(), all_vals[:, 40-1].std(),
            all_vals[:, 50-1].mean(), all_vals[:, 50-1].std(),
            all_vals[:, 60-1].mean(), all_vals[:, 60-1].std(),
            all_vals[:, 70-1].mean(), all_vals[:, 70-1].std(),
            all_vals[:, 80-1].mean(), all_vals[:, 80-1].std(),
            all_vals[:, 90-1].mean(), all_vals[:, 90-1].std(),
            all_vals[:, 100-1].mean(), all_vals[:, 100-1].std(),
            np.max(np.mean(all_vals, axis=0))))

    all_vals = np.mean(all_vals, axis=0)
    print(all_vals)
    print(np.max(all_vals))
    print(np.argmax(all_vals))