def evaluate_embeddings(): method = "multi-HSD" graphName = "europe" candidates = list(range(1, 17)) candidates.extend([32, 64, 128]) #, 256, 512, 1024]) SIR_val = 0.0 PageRank_val = 0.0 for dimension in candidates: embedding_dict = rw.read_vectors( f"output/{method}_{graphName}_{dimension}.csv") SIR_label_dict = dataloader.read_label(f"data/label/{graphName}.label") PageRank_label_dict = dataloader.read_label( f"data/label/{graphName}_PageRank.label") embeddings, SIR_labels = [], [] PageRank_labels = [] for node, vector in embedding_dict.items(): embeddings.append(vector) SIR_labels.append(SIR_label_dict[node]) PageRank_labels.append(PageRank_label_dict[node]) print(f"{method}, {graphName}, dimension: {dimension}") SIR_val = max( SIR_val, evaluate.KNN_evaluate(embeddings, SIR_labels, cv=10, n_neighbor=20)) PageRank_val = max( PageRank_val, evaluate.KNN_evaluate(embeddings, PageRank_labels, cv=10, n_neighbor=20)) #evaluate.LR_evaluate(embeddings, labels) print(f"max score, SIR:{SIR_val}, PageRank:{PageRank_val}\n")
def multi_HSD_Test(graphName, hop=3, n_scales=200, cv=5, n_neighbor=10): graph = nx.read_edgelist(f"data/graph/{graphName}.edgelist", create_using=nx.Graph, edgetype=float, data=[('weight', float)]) PageRank_label_dict = dataloader.read_label( f"data/label/{graphName}_PageRank.label") SIR_label_dict = dataloader.read_label(f"data/label/{graphName}.label") model = MultiHSD(graph, graphName, hop, n_scales) model.init() embedding_dict = model.parallel_embed(n_workers=10) embeddings, SIR_labels, PageRank_labels = [], [], [] for node, vector in embedding_dict.items(): embeddings.append(vector) SIR_labels.append(SIR_label_dict[node]) PageRank_labels.append(PageRank_label_dict[node]) SIR_val = evaluate.KNN_evaluate(embeddings, SIR_labels, cv=cv, n_neighbor=n_neighbor) PageRank_val = evaluate.KNN_evaluate(embeddings, PageRank_labels, cv=cv, n_neighbor=n_neighbor) return SIR_val, PageRank_val
def base_HSD_Test(graphName, hop=3, metric="wasserstein"): graph = nx.read_edgelist(f"data/graph/{graphName}.edgelist", create_using=nx.Graph, edgetype=float, data=[('weight', float)]) label_dict = dataloader.read_label(f"data/label/{graphName}.label") model = HSD(graph, graphName, 0, hop, metric) model.construct_hierarchy() labels = [] for idx, node in enumerate(model.nodes): labels.append(label_dict[node]) model.eigenvalues, model.eigenvectors = np.linalg.eigh(model.laplacian) scale_min, scale_max = util.recommend_scale_range(list(model.eigenvalues)) score_max, scale_opt = 0, 0 for scale in np.linspace(scale_min, scale_max, num=5, dtype=np.float): model.scale = scale model.calculate_wavelets(model.scale, approx=True) dists = model.parallel_calculate_HSD(n_workers=10) knn_score = evaluate.KNN_evaluate(dists, labels, metric="precomputed", cv=10, n_neighbor=20) if knn_score > score_max: score_max = knn_score scale_opt = scale print(f"max score: {score_max}, optimal scale: {scale_opt}\n") return score_max, scale_opt
def run(graph_name, label_type): graph, label_dict = dataloader.load_data(graph_name, label_type) graphwave = GraphWave.GraphWave(graph) scale_min, scale_max = GraphWave.recommend_scale_range( graphwave.eigenvalues) candidate_scales = np.linspace(scale_min, scale_max * 2, 10) candidate_sample_points = [ np.linspace(0, upper, 100) for upper in range(10, 100, 10) ] nodes_dict = {} for node in graphwave.nodes: nodes_dict[node] = node accuracy_file = open(f"{graph_name}_accuacy.txt", mode="w+", encoding="utf-8") max_accuracy = 0.0 for scale in candidate_scales: graphwave.calculate_wavelets(scale, approx=True) for points in candidate_sample_points: embedding_dict = graphwave.embed(points) nodes, vectors, labels = util.merge_dicts_to_lists( nodes_dict, embedding_dict, label_dict) acc = evaluate.KNN_evaluate(vectors, labels, cv=5, n_neighbor=20) max_accuracy = max(acc, max_accuracy) accuracy_file.write( f"scale: {scale}, sample_upper: {points[-1]}, accuracy: {acc} \n" ) accuracy_file.flush() accuracy_file.close() print(f"max accuracy: {max_accuracy}")
def multi_HSD_Test(graphName, hop=3, n_scales=200): graph = nx.read_edgelist(f"data/graph/{graphName}.edgelist", create_using=nx.Graph, edgetype=float, data=[('weight', float)]) label_dict = dataloader.read_label(f"data/label/{graphName}.label") model = MultiHSD(graph, graphName, hop, n_scales) model.init() embedding_dict = model.parallel_embed(n_workers=10) #rw.save_vectors_dict(embedding_dict, f"output/multi_HSD_{graphName}_{n_scales}.csv") embeddings, labels = [], [] for node, vector in embedding_dict.items(): embeddings.append(vector) labels.append(label_dict[node]) knn_score = evaluate.KNN_evaluate(embeddings, labels) lr_score = evaluate.LR_evaluate(embeddings, labels) # hellinger distance # dists = np.zeros((model.n_node, model.n_node), dtype=np.float) # step = hop + 1 # for idx1 in range(model.n_node): # for idx2 in range(idx1+1, model.n_node): # cur_idx = 0 # while cur_idx + step <= len(embeddings[0]): # dists[idx1][idx2] += metrics.hellinger_distance(p=embeddings[idx1][cur_idx:cur_idx+step], q=embeddings[idx2][cur_idx:cur_idx+step]) # cur_idx += step # dists[idx2][idx1] = dists[idx1][idx2] # print("hellinger") # knn_score = evaluate.KNN_evaluate(dists, labels, metric="precomputed") return knn_score, lr_score
def evaluate_embeddings(): method = "rolx" graphName = "usa" candidates = list(range(1, 17)) candidates.extend([32, 64, 128, 256, 512, 1024]) for dimension in candidates: embedding_dict = rw.read_vectors( f"output/{method}_{graphName}_{dimension}.csv") label_dict = dataloader.read_label(f"data/label/{graphName}.label") embeddings, labels = [], [] for node, vector in embedding_dict.items(): embeddings.append(vector) labels.append(label_dict[node]) print(f"{method}, {graphName}, dimension: {dimension}") evaluate.KNN_evaluate(embeddings, labels) evaluate.LR_evaluate(embeddings, labels)