def run_bc_test_based_on_group(adj_lists_test, feat_data, test, model_name, output, edge_count): num_nodes = 10312 feature_dim = 128 embed_dim = 128 feat_data_cp = np.ones((10312, 128)) features = nn.Embedding(num_nodes, feature_dim) features.weight = nn.Parameter(torch.FloatTensor(feat_data_cp), requires_grad=False) # Set up the model using the testing graph structure agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, feature_dim, embed_dim, adj_lists_test, agg1, gcn=False, cuda=False) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, embed_dim, adj_lists_test, agg2, base_model=enc1, gcn=False, cuda=False) # Possible additional layers # agg3 = MeanAggregator(lambda nodes: enc2(nodes).t(), cuda=False) # enc3 = Encoder(lambda nodes: enc2(nodes).t(), enc2.embed_dim, embed_dim, adj_lists_test, agg3, # base_model=enc2, gcn=False, cuda=False) # agg4 = MeanAggregator(lambda nodes: enc3(nodes).t(), cuda=False) # enc4 = Encoder(lambda nodes: enc3(nodes).t(), enc3.embed_dim, embed_dim, adj_lists_test, agg4, # base_model=enc3, gcn=False, cuda=False) enc1.num_sample = edge_count enc2.num_sample = 10 # enc3.num_sample = 15 # enc4.num_sample = 20 # Initial the model and load the stored parameters from the file graphsage = RegressionGraphSage(enc2) graphsage.load_state_dict(torch.load(model_name)) graphsage.eval() # Compute the cosine similarity embed_output = graphsage.forward(test) cos = nn.CosineSimilarity(dim=1, eps=1e-6) print("Average Validation Cosine Similarity:", cos(embed_output, torch.FloatTensor(feat_data[test])).mean(0).item()) # Save Embedding to file np.savetxt(output, embed_output.data.numpy())
def run_bc_test(adj_lists_test, feat_data, test, model_name, output, edge_count): num_nodes = 10312 feature_dim = 128 embed_dim = 128 features = nn.Embedding(num_nodes, feature_dim) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, feature_dim, embed_dim, adj_lists_test, agg1, gcn=False, cuda=False) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, embed_dim, adj_lists_test, agg2, base_model=enc1, gcn=False, cuda=False) enc1.num_sample = edge_count enc2.num_sample = edge_count graphsage = RegressionGraphSage(enc2) graphsage.load_state_dict(torch.load(model_name)) graphsage.eval() # test data based on degree (group) test_data = [] with open("../BlogCatalog-data/data_id0.txt", "r") as f: vecs = f.readline().split(" ") for x in vecs: test.append(x) embed_output = graphsage.forward(test_data) cos = nn.CosineSimilarity(dim=1, eps=1e-6) print("Average Validation Cosine Similarity:", cos(embed_output, torch.FloatTensor(feat_data[test])).mean(0).item()) #Save Embedding to file np.savetxt(output, embed_output.data.numpy()) with open("test_id" + str(edge_count) + ".txt", "w") as f: for item in test: f.write(str(item) + " ")
def run_bc(sample_count, model_name, output): # np.random.seed(1) #random.seed(1) num_nodes = 10312 feature_dim = 128 embed_dim = 128 # Select training&test set from qualified nodes selected_id = get_partial_list(1500) # Load node2vec features adj_lists, adj_lists_empty, features_node2vec = load_blog_catalog( selected_id) # Build the graph adj_lists_train, adj_lists_test, train, test, adj_lists = preprocessing( selected_id, 300, sample_count, adj_lists, adj_lists_empty, True) # Init the input feature of every node into all-one vector feat_data = np.ones((num_nodes, feature_dim)) features = nn.Embedding(num_nodes, feature_dim) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, feature_dim, embed_dim, adj_lists_train, agg1, gcn=False, cuda=False) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, embed_dim, adj_lists_train, agg2, base_model=enc1, gcn=False, cuda=False) # Possible additional layers # agg3 = MeanAggregator(lambda nodes: enc2(nodes).t(), cuda=False) # enc3 = Encoder(lambda nodes: enc2(nodes).t(), enc2.embed_dim, embed_dim, adj_lists_train, agg3, # base_model=enc2, gcn=False, cuda=False) # agg4 = MeanAggregator(lambda nodes: enc3(nodes).t(), cuda=False) # enc4 = Encoder(lambda nodes: enc3(nodes).t(), enc3.embed_dim, embed_dim, adj_lists_train, agg4, # base_model=enc3, gcn=False, cuda=False) enc1.num_sample = sample_count enc2.num_sample = 10 # enc3.num_sample = 15 # enc4.num_sample = 20 # Initialize the Graph-SAGE model graphsage = RegressionGraphSage(enc2) # Prepare the input data for the testing model feat_data_test = np.ones((10312, 128)) features_test = nn.Embedding(num_nodes, feature_dim) features_test.weight = nn.Parameter(torch.FloatTensor(feat_data_test), requires_grad=False) # Set up the model with testing graph structure agg1_test = MeanAggregator(features_test, cuda=True) enc1_test = Encoder(features_test, feature_dim, embed_dim, adj_lists_test, agg1_test, gcn=False, cuda=False) agg2_test = MeanAggregator(lambda nodes: enc1_test(nodes).t(), cuda=False) enc2_test = Encoder(lambda nodes: enc1_test(nodes).t(), enc1_test.embed_dim, embed_dim, adj_lists_test, agg2_test, base_model=enc1_test, gcn=False, cuda=False) # agg3_test = MeanAggregator(lambda nodes: enc2_test(nodes).t(), cuda=False) # enc3_test = Encoder(lambda nodes: enc2_test(nodes).t(), enc2_test.embed_dim, embed_dim, adj_lists_test, agg3_test, # base_model=enc2_test, gcn=False, cuda=False) # agg4_test = MeanAggregator(lambda nodes: enc3_test(nodes).t(), cuda=False) # enc4_test = Encoder(lambda nodes: enc3_test(nodes).t(), enc3_test.embed_dim, embed_dim, adj_lists_test, agg4_test, # base_model=enc3_test, gcn=False, cuda=False) enc1_test.num_sample = sample_count enc2_test.num_sample = 10 # enc3_test.num_sample = 10 # enc4_test.num_sample = 10 optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.3) times = [] for epoch in range(1000): batch_nodes = train[:256] random.shuffle(train) start_time = time.time() optimizer.zero_grad() # Select a random number of the sample size of the first layer and build the training graph according to this sample_count_epoch = random.randint(1, sample_count) adj_lists_train_1, _, _, _, _ = preprocessing(selected_id, 300, sample_count_epoch, adj_lists, adj_lists_empty, False) # adj_lists_train_2, _, _, _, _ = preprocessing(selected_id, 300, 10, adj_lists, adj_lists_empty, True) # Configure the hyperparameters in each epoch enc1.adj_lists = adj_lists_train_1 enc2.adj_lists = adj_lists_train_1 enc1.num_sample = sample_count_epoch # Calculate the loss and back propagate it loss = graphsage.loss( batch_nodes, Variable( torch.FloatTensor(features_node2vec[np.array(batch_nodes)]))) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) print(epoch, loss) # Every 10 epochs, use the model to run the test graph and data, and compute the current cosine similarity if epoch % 10 == 9: graphsage_test = RegressionGraphSage(enc2_test) graphsage_test.load_state_dict(graphsage.state_dict()) graphsage_test.eval() embed_output = graphsage_test.forward(test) cos = nn.CosineSimilarity(dim=1, eps=1e-6) print( "Cosine similarity: ", cos(embed_output, torch.FloatTensor(features_node2vec[test])).mean(0).item()) # Save model torch.save(graphsage.state_dict(), model_name) # run_bc_test_based_on_group(adj_lists_test, feat_data, test, model_name, output, sample_count) # embed_output = graphsage.forward(test) # cos = nn.CosineSimilarity(dim=1, eps=1e-6) # print("Average Validation Cosine Similarity:", cos(embed_output, torch.FloatTensor(feat_data[test])).mean(0).item()) # # print("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")) print("Average batch time:", np.mean(times))