Beispiel #1
0
def run_cora_incremental(feat_data, labels, adj_lists, args):
    features = nn.Embedding(2708, 1433)
    features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)

    agg1 = MeanAggregator(features, cuda=True)
    enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False)
    agg2 = MeanAggregator(lambda nodes : enc1(nodes).t(), cuda=False)
    enc2 = Encoder(lambda nodes : enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2,
            base_model=enc1, gcn=True, cuda=False)
    enc1.num_samples = 5
    enc2.num_samples = 5

    graphsage = IncrementSupervisedGraphSage(7, enc2, labels, args)

    val_data = Continuum(name="cora", data_type='val', download=True)
    val = val_data.nodes()
    for i in range(7):
        incremental_data = Continuum(name="cora", data_type='train', download=True, task_type=i)  
        train = incremental_data.nodes()
        random.shuffle(train)
        print("the size of task: %i"%len(train))
        for i in range(0, len(train), args.batch_size):
            if i+args.batch_size <= len(train):
                batch_nodes = train[i:i+args.batch_size]
            else:
                batch_nodes = train[i:len(train)]
            graphsage.observe(batch_nodes)

        val_output = graphsage.forward(val) 
        print("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro"))
Beispiel #2
0
def run_pubmed_incremental(feat_data, labels, adj_lists, args):
    evaluation_metrics = []
    features = nn.Embedding(19717, 500)
    features.weight = nn.Parameter(torch.FloatTensor(feat_data),
                                   requires_grad=False)
    # features.cuda()

    agg1 = MeanAggregator(features, cuda=True)
    enc1 = Encoder(features, 500, 128, adj_lists, agg1, gcn=True, cuda=False)
    agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False)
    enc2 = Encoder(lambda nodes: enc1(nodes).t(),
                   enc1.embed_dim,
                   128,
                   adj_lists,
                   agg2,
                   base_model=enc1,
                   gcn=True,
                   cuda=False)
    enc1.num_samples = 10
    enc2.num_samples = 25

    graphsage = IncrementSupervisedGraphSage(3, enc2, labels, args)
    val_data = Continuum(name="pubmed", data_type='val', download=True)

    val = val_data.nodes()
    for t in range(val_data.num_class):
        incremental_data = Continuum(name="pubmed",
                                     data_type='train',
                                     download=True,
                                     task_type=t)
        train = incremental_data.nodes()
        random.shuffle(train)
        print("the size of task: %i" % len(train))
        for i in range(0, len(train), args.batch_size):
            if i + args.batch_size <= len(train):
                batch_nodes = train[i:i + args.batch_size]
            else:
                batch_nodes = train[i:len(train)]
            graphsage.observe(batch_nodes)

        val_output = graphsage.forward(val)
        evaluation_metrics.append([
            i,
            len(train),
            f1_score(labels[val],
                     val_output.data.numpy().argmax(axis=1),
                     average="micro")
        ])
    return evaluation_metrics
Beispiel #3
0
args = parser.parse_args()
print(args)
torch.manual_seed(args.seed)

if __name__ == "__main__":
    total_results = []
    for memory in [50, 100, 200, 500]:
        args.memory_size = memory
        results = []
        for i in range(args.repeat):
            args.seed = i
            print(args)
            np.random.seed(args.seed)
            random.seed(args.seed)
            incremental_data = Continuum(name=args.dataset,
                                         data_type='all_train',
                                         download=True,
                                         task_type=0)
            num_nodes = len(incremental_data.labels)

            adj_lists = incremental_data.neighbors()
            feat_data = incremental_data.features.numpy()
            labels = incremental_data.labels
            if args.dataset == "pubmed":
                results.append(
                    run_pubmed_incremental(feat_data, labels, adj_lists, args))
            if args.task_incremental and args.dataset == "cora":
                results.append(
                    run_cora_incremental(feat_data, labels, adj_lists, args))
            elif args.dataset == "cora":
                results.append(run_cora(feat_data, labels, adj_lists, args))
        total_results.append(results)