def L_P_WACT(network): num_add = 0 # the number of egdes to be added nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes with edge and without edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes(), 0)): for j, elej in enumerate(list(network.nodes(), 0)): if i >= j: continue try: L = nx.laplacian_matrix(network, None, 'weight').A L_aa = L[i][i] L_bb = L[j][j] L_ab = L[i][j] score = 1 / L_aa + L_bb - 2 * L_ab except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append( c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True
def L_P_WCN(network: nx.Graph, num_add): nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes without edge and with edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): # initialize score for each edge score = 0.0 if i >= j: continue try: for z in nx.common_neighbors(network, elei, elej): w_elei_z = network.get_edge_data(elei, z).get('weight') w_z_elej = network.get_edge_data(z, elej).get('weight') score += w_elei_z + w_z_elej except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append(c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) ''' for a, b, c in edges_add: network.add_edge(a, b) # add selected edges ''' return edges_add
def L_P_WRWWR(network): num_add = 0 # the number of egdes to be added nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes with edge and without edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): if i >= j: continue try: score = (CCD(network, elei, elej, i, j, 0.15) + CCD(network, elej, elei, j, i, 0.15)) / 2 except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append( c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True
def L_P_WJC(network): num_add = 0 # the number of egdes to be added nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes with edge and without edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): score = 0.0 if i >= j: continue try: list_cm = nx.common_neighbors(network, elei, elej) total_w_elei_z_elej = 0 total_w_z_elei = 0 total_w_z_elej = 0 total_min_w = 0 for z in list_cm: w_elei_z = network.get_edge_data(elei, z).get('weight') w_z_elej = network.get_edge_data(z, elej).get('weight') w_elei_z_elej = w_elei_z + w_z_elej total_w_elei_z_elej += w_elei_z_elej for z in network.neighbors(elei): w_z_elei = network.get_edge_data(z, elei).get('weight') total_w_z_elei += w_z_elei for z in network.neighbors(elej): w_z_elej = network.get_edge_data(z, elej).get('weight') total_w_z_elej += w_z_elej for z in list_cm: w_elei_z = network.get_edge_data(elei, z).get('weight') w_z_elej = network.get_edge_data(z, elej).get('weight') min_w = min(w_elei_z, w_z_elej) total_min_w += min_w score = total_w_1_2 / total_w_x_elei + total_w_y_elej - total_min_w except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append( c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) for a, b, c in edges_add: if (network.has_edge(a, b)): network[a][b]['weight'] += 1 else: network.add_edge(a, b) return True
def take_one_model_to_predict(graph: nx.Graph, weighted: bool, num_predict: int, model_index: int): if graph is None: raise Exception("graph is None!") if model_index is None or model_index == '': raise Exception("model_index is not valid!") if weighted: print("use {0} model to predict {1} edges...".format(emn.weighted_model_map[model_index], num_predict)) nodes_pair_list, probability_list = eval("emn."+emn.weighted_model_map[model_index])(graph) edges_predict = cp.prob_select(nodes_pair_list, probability_list, num_predict) else: nodes_pair_list, probability_list = eval("emn."+emn.unweighted_model_map[model_index])(graph) edges_predict = cp.prob_select(nodes_pair_list, probability_list, num_predict) if edges_predict is None: raise Exception("model predicting error!") else: return edges_predict
def find_best_fit_model(graph: nx.Graph, weighted: bool, num_predict: int, ground_truth_list: list): if graph is None: raise Exception("graph is None!") if num_predict is None: raise Exception("num_predict is None! Please set the parameter!") best_model_index = 0 best_recall_rate = 0.0 # weighted graph if weighted: # use every model to predict and calculate recall rate to find the best one model_num = emn.weighted_model_num print("total number of models:{0}, start evaluating.".format(model_num)) average_recall_rate_of_every_model = {} for index in emn.weighted_model_map.keys(): # call different model dynamically print("try {0}th model {1}:".format(index, emn.weighted_model_map[index])) nodes_pair_list, probability_list = eval("emn." + emn.weighted_model_map[index])(graph) print("repeat prob_select {0} times to calculate average recall rate...".format(prob_select_times)) recall_rate_list = [] # repeat prob_select to calculate average recall rate for every model for i in range(0, prob_select_times): print("the {0}th time of selection:".format(i+1)) predict_edge_list = cp.prob_select(nodes_pair_list, probability_list, num_predict) recall_rate = cp.calculate_recall(predict_edge_list, ground_truth_list) recall_rate_list.append(recall_rate) average_recall_rate = float(sum(recall_rate_list)) / len(recall_rate_list) print("average recall rate for {0} is {1}\n".format(emn.weighted_model_map[index], average_recall_rate)) # record every model's recall rate average_recall_rate_of_every_model[index] = average_recall_rate if average_recall_rate > best_recall_rate: print("{0} > {1}, better model found:{2}:{3}\n".format(average_recall_rate, best_recall_rate, index, emn.weighted_model_map[index])) best_model_index = index best_recall_rate = average_recall_rate # unweighted TODO else: pass return best_model_index
def L_P_WAA(network): num_add = 0 # the number of egdes to be added nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes with edge and without edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): if i >= j: continue try: list_cm = nx.common_neighbors(network, elei, elej) total_w_x_z = 0 for z in list_cm: w_elei_z = network.get_edge_data(elei, z).get('weight') w_z_elej = network.get_edge_data(z, elej).get('weight') w_elei_z_elej = w_elei_z + w_z_elej for x in network.neighbors(z): w_x_z = network.get_edge_data(x, z).get('weight') total_w_x_z += w_x_z score += w_elei_z_elej / math.log(1 + total_w_x_z) except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append( c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True
def L_P_WKatzIndex(network): num_add = 0 # the number of egdes to be added nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model score_old = 0.0 # the last score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes with edge and without edge B = 0.1 # a free parameter to control path weights. The longer the path is, the less contribution the path made to the similarity # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes())): for j, elej in enumerate(list(network.nodes())): if i >= j: continue try: for l in range(1, max_length): score_old = score A_matrix = nx.adjacency_matrix(network, None, 'weight').A A_elei_elej_l = pow(A_matrix, l)[i][j] score += pow(B, l) * A_elei_elej_l if is_katz_converge(score, score_old): break except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append( c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True
def L_P_WSimRan(network, C=0.8, max_iter=100): # C: float, 0< C <=1, it is the decay factor which represents the relative importance between in-direct neighbors and direct neighbors. # max_iter: integer, the number specifies the maximum number of iterations for simrank num_add = 0 # the number of egdes to be added nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes with edge and without edge # init.vars sim = defaultdict(list) # the similarity between two nodes sim_old = defaultdict( list) # the similarity between two nodes in last recursion for n in network.nodes(): sim[n] = defaultdict(int) sim[n][n] = 1 sim_old[n] = defaultdict(int) sim_old[n][n] = 0 # recursively calculate simrank for iter_ctr in range(max_iter): if _is_sim_converge(sim, sim_old): break # calculate the score of each pair of nodes sim_old = copy.deepcopy(sim) for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): if i == j: continue try: s_elei_elej = 0.0 total_w_z_elei = 0 total_w_z_elej = 0 for u in network.neighbors(elei): for v in network.neighbors(elej): s_elei_elej += sim_old[u][v] for z in network.neighbors(elei): w_z_elei = network.get_edge_data(z, elei).get('weight') total_w_z_elei += w_z_elei for z in network.neighbors(elej): w_z_elej = network.get_edge_data(z, elej).get('weight') total_w_z_elej += w_z_elej sim[elei][elej] = (C * s_elei_elej / total_w_z_elei * total_w_z_elej) except: continue for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): if i >= j: continue try: score = sim[elei][elej] except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append( c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True