예제 #1
0
def run_bc_test_based_on_group(adj_lists_test, feat_data, test, model_name,
                               output, edge_count):
    num_nodes = 10312
    feature_dim = 128
    embed_dim = 128

    feat_data_cp = np.ones((10312, 128))
    features = nn.Embedding(num_nodes, feature_dim)
    features.weight = nn.Parameter(torch.FloatTensor(feat_data_cp),
                                   requires_grad=False)

    # Set up the model using the testing graph structure
    agg1 = MeanAggregator(features, cuda=True)
    enc1 = Encoder(features,
                   feature_dim,
                   embed_dim,
                   adj_lists_test,
                   agg1,
                   gcn=False,
                   cuda=False)
    agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False)
    enc2 = Encoder(lambda nodes: enc1(nodes).t(),
                   enc1.embed_dim,
                   embed_dim,
                   adj_lists_test,
                   agg2,
                   base_model=enc1,
                   gcn=False,
                   cuda=False)

    # Possible additional layers
    # agg3 = MeanAggregator(lambda nodes: enc2(nodes).t(), cuda=False)
    # enc3 = Encoder(lambda nodes: enc2(nodes).t(), enc2.embed_dim, embed_dim, adj_lists_test, agg3,
    #                base_model=enc2, gcn=False, cuda=False)
    # agg4 = MeanAggregator(lambda nodes: enc3(nodes).t(), cuda=False)
    # enc4 = Encoder(lambda nodes: enc3(nodes).t(), enc3.embed_dim, embed_dim, adj_lists_test, agg4,
    #                base_model=enc3, gcn=False, cuda=False)

    enc1.num_sample = edge_count
    enc2.num_sample = 10
    # enc3.num_sample = 15
    # enc4.num_sample = 20

    # Initial the model and load the stored parameters from the file
    graphsage = RegressionGraphSage(enc2)
    graphsage.load_state_dict(torch.load(model_name))
    graphsage.eval()

    # Compute the cosine similarity
    embed_output = graphsage.forward(test)
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    print("Average Validation Cosine Similarity:",
          cos(embed_output, torch.FloatTensor(feat_data[test])).mean(0).item())

    # Save Embedding to file
    np.savetxt(output, embed_output.data.numpy())
예제 #2
0
def run_bc_test(adj_lists_test, feat_data, test, model_name, output,
                edge_count):
    num_nodes = 10312
    feature_dim = 128
    embed_dim = 128

    features = nn.Embedding(num_nodes, feature_dim)
    features.weight = nn.Parameter(torch.FloatTensor(feat_data),
                                   requires_grad=False)

    agg1 = MeanAggregator(features, cuda=True)
    enc1 = Encoder(features,
                   feature_dim,
                   embed_dim,
                   adj_lists_test,
                   agg1,
                   gcn=False,
                   cuda=False)
    agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False)
    enc2 = Encoder(lambda nodes: enc1(nodes).t(),
                   enc1.embed_dim,
                   embed_dim,
                   adj_lists_test,
                   agg2,
                   base_model=enc1,
                   gcn=False,
                   cuda=False)
    enc1.num_sample = edge_count
    enc2.num_sample = edge_count
    graphsage = RegressionGraphSage(enc2)

    graphsage.load_state_dict(torch.load(model_name))
    graphsage.eval()

    # test data based on degree (group)
    test_data = []
    with open("../BlogCatalog-data/data_id0.txt", "r") as f:
        vecs = f.readline().split(" ")
        for x in vecs:
            test.append(x)

    embed_output = graphsage.forward(test_data)
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    print("Average Validation Cosine Similarity:",
          cos(embed_output, torch.FloatTensor(feat_data[test])).mean(0).item())

    #Save Embedding to file
    np.savetxt(output, embed_output.data.numpy())

    with open("test_id" + str(edge_count) + ".txt", "w") as f:
        for item in test:
            f.write(str(item) + " ")
예제 #3
0
def run_bc(sample_count, model_name, output):
    # np.random.seed(1)
    #random.seed(1)
    num_nodes = 10312
    feature_dim = 128
    embed_dim = 128
    # Select training&test set from qualified nodes
    selected_id = get_partial_list(1500)

    # Load node2vec features
    adj_lists, adj_lists_empty, features_node2vec = load_blog_catalog(
        selected_id)

    # Build the graph
    adj_lists_train, adj_lists_test, train, test, adj_lists = preprocessing(
        selected_id, 300, sample_count, adj_lists, adj_lists_empty, True)

    # Init the input feature of every node into all-one vector
    feat_data = np.ones((num_nodes, feature_dim))

    features = nn.Embedding(num_nodes, feature_dim)
    features.weight = nn.Parameter(torch.FloatTensor(feat_data),
                                   requires_grad=False)

    agg1 = MeanAggregator(features, cuda=True)
    enc1 = Encoder(features,
                   feature_dim,
                   embed_dim,
                   adj_lists_train,
                   agg1,
                   gcn=False,
                   cuda=False)
    agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False)
    enc2 = Encoder(lambda nodes: enc1(nodes).t(),
                   enc1.embed_dim,
                   embed_dim,
                   adj_lists_train,
                   agg2,
                   base_model=enc1,
                   gcn=False,
                   cuda=False)

    # Possible additional layers
    # agg3 = MeanAggregator(lambda nodes: enc2(nodes).t(), cuda=False)
    # enc3 = Encoder(lambda nodes: enc2(nodes).t(), enc2.embed_dim, embed_dim, adj_lists_train, agg3,
    #                base_model=enc2, gcn=False, cuda=False)
    # agg4 = MeanAggregator(lambda nodes: enc3(nodes).t(), cuda=False)
    # enc4 = Encoder(lambda nodes: enc3(nodes).t(), enc3.embed_dim, embed_dim, adj_lists_train, agg4,
    #                base_model=enc3, gcn=False, cuda=False)

    enc1.num_sample = sample_count
    enc2.num_sample = 10
    # enc3.num_sample = 15
    # enc4.num_sample = 20

    # Initialize the Graph-SAGE model
    graphsage = RegressionGraphSage(enc2)

    # Prepare the input data for the testing model
    feat_data_test = np.ones((10312, 128))
    features_test = nn.Embedding(num_nodes, feature_dim)
    features_test.weight = nn.Parameter(torch.FloatTensor(feat_data_test),
                                        requires_grad=False)

    # Set up the model with testing graph structure
    agg1_test = MeanAggregator(features_test, cuda=True)
    enc1_test = Encoder(features_test,
                        feature_dim,
                        embed_dim,
                        adj_lists_test,
                        agg1_test,
                        gcn=False,
                        cuda=False)
    agg2_test = MeanAggregator(lambda nodes: enc1_test(nodes).t(), cuda=False)
    enc2_test = Encoder(lambda nodes: enc1_test(nodes).t(),
                        enc1_test.embed_dim,
                        embed_dim,
                        adj_lists_test,
                        agg2_test,
                        base_model=enc1_test,
                        gcn=False,
                        cuda=False)

    # agg3_test = MeanAggregator(lambda nodes: enc2_test(nodes).t(), cuda=False)
    # enc3_test = Encoder(lambda nodes: enc2_test(nodes).t(), enc2_test.embed_dim, embed_dim, adj_lists_test, agg3_test,
    #                base_model=enc2_test, gcn=False, cuda=False)
    # agg4_test = MeanAggregator(lambda nodes: enc3_test(nodes).t(), cuda=False)
    # enc4_test = Encoder(lambda nodes: enc3_test(nodes).t(), enc3_test.embed_dim, embed_dim, adj_lists_test, agg4_test,
    #                     base_model=enc3_test, gcn=False, cuda=False)
    enc1_test.num_sample = sample_count
    enc2_test.num_sample = 10
    # enc3_test.num_sample = 10
    # enc4_test.num_sample = 10

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       graphsage.parameters()),
                                lr=0.3)
    times = []
    for epoch in range(1000):
        batch_nodes = train[:256]
        random.shuffle(train)
        start_time = time.time()
        optimizer.zero_grad()

        # Select a random number of the sample size of the first layer and build the training graph according to this
        sample_count_epoch = random.randint(1, sample_count)
        adj_lists_train_1, _, _, _, _ = preprocessing(selected_id, 300,
                                                      sample_count_epoch,
                                                      adj_lists,
                                                      adj_lists_empty, False)
        # adj_lists_train_2, _, _, _, _ = preprocessing(selected_id, 300, 10, adj_lists, adj_lists_empty, True)

        # Configure the hyperparameters in each epoch
        enc1.adj_lists = adj_lists_train_1
        enc2.adj_lists = adj_lists_train_1
        enc1.num_sample = sample_count_epoch

        # Calculate the loss and back propagate it
        loss = graphsage.loss(
            batch_nodes,
            Variable(
                torch.FloatTensor(features_node2vec[np.array(batch_nodes)])))
        loss.backward()
        optimizer.step()
        end_time = time.time()
        times.append(end_time - start_time)
        print(epoch, loss)

        # Every 10 epochs, use the model to run the test graph and data, and compute the current cosine similarity
        if epoch % 10 == 9:
            graphsage_test = RegressionGraphSage(enc2_test)
            graphsage_test.load_state_dict(graphsage.state_dict())
            graphsage_test.eval()

            embed_output = graphsage_test.forward(test)
            cos = nn.CosineSimilarity(dim=1, eps=1e-6)
            print(
                "Cosine similarity: ",
                cos(embed_output,
                    torch.FloatTensor(features_node2vec[test])).mean(0).item())

    # Save model
    torch.save(graphsage.state_dict(), model_name)
    # run_bc_test_based_on_group(adj_lists_test, feat_data, test, model_name, output, sample_count)

    # embed_output = graphsage.forward(test)
    # cos = nn.CosineSimilarity(dim=1, eps=1e-6)
    # print("Average Validation Cosine Similarity:", cos(embed_output, torch.FloatTensor(feat_data[test])).mean(0).item())
    # # print("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro"))
    print("Average batch time:", np.mean(times))