def compute_indexes(G: nx.Graph, method, negative, positive): if method == 'resource_allocation': return nx.resource_allocation_index( G, negative), nx.resource_allocation_index(G, positive) elif method == 'jaccard_coefficient': return nx.jaccard_coefficient(G, negative), nx.jaccard_coefficient( G, positive) elif method == 'adamic_adar': return nx.adamic_adar_index(G, negative), nx.adamic_adar_index( G, positive) elif method == 'preferential_attachment': return nx.preferential_attachment( G, negative), nx.preferential_attachment(G, positive) elif method == 'sorensen_neighbours': return ([(u, v, sorensen_index(G, u, v)) for u, v in negative], [(u, v, sorensen_index(G, u, v)) for u, v in positive]) elif method == 'community': c = louvain(G) commLabels = c.communities comms = c.to_node_community_map() return ([(u, v, community_index(G, u, v, commLabels, comms)) for u, v in negative], [(u, v, community_index(G, u, v, commLabels, comms)) for u, v in positive]) else: raise NameError('The given method is not supported')
def get_link_pred_auc(graph, pos_test, neg_test): jc_pos_test_pred = nx.jaccard_coefficient(graph, pos_test) jc_neg_test_pred = nx.jaccard_coefficient(graph, neg_test) jc_pos_score = [p for _, _, p in jc_pos_test_pred] jc_neg_score = [n for _, _, n in jc_neg_test_pred] jc_all_labels = [1] * len(jc_pos_score) + [0] * len(jc_neg_score) jc_all_scores = jc_pos_score + jc_neg_score jc_auc = metrics.roc_auc_score(jc_all_labels, jc_all_scores) aa_pos_test_pred = nx.resource_allocation_index(graph, pos_test) aa_neg_test_pred = nx.resource_allocation_index(graph, neg_test) aa_pos_score = [p for _, _, p in aa_pos_test_pred] aa_neg_score = [n for _, _, n in aa_neg_test_pred] aa_all_labels = [1] * len(aa_pos_score) + [0] * len(aa_neg_score) aa_all_scores = aa_pos_score + aa_neg_score aa_auc = metrics.roc_auc_score(aa_all_labels, aa_all_scores) return jc_auc, aa_auc
def get_feature(nonedge, G, df): #=========common_neigh=========== common_neigh = compute_common_neigh(G, nonedge) v = get_list_value(common_neigh) df['common_neigh'] = v print_output(common_neigh) #=========jaccard_coefficient=========== jaccard = list(nx.jaccard_coefficient(G, nonedge)) v = get_list_value(jaccard) df['jaccard'] = v print_output(jaccard) resource_alloc = list(nx.resource_allocation_index(G, nonedge)) v = get_list_value(resource_alloc) df['resource_alloc'] = v print_output(resource_alloc) adamic_adar = list(nx.adamic_adar_index(G, nonedge)) v = get_list_value(adamic_adar) df['adamic_adar'] = v print_output(adamic_adar) pref_attach = list(nx.preferential_attachment(G, nonedge)) v = get_list_value(pref_attach) df['pref_attach'] = v print_output(pref_attach)
def Link_Precision(Graph): """Step1------构造网络""" G = Graph # plt.figure(1) # plt.subplot(211) # pos = nx.circular_layout(G) # nx.draw_networkx(G, pos, with_labels=True) # 对构造的网络删除部分连边 test_set, non_edges_set = delete_link(G) # plt.subplot(212) # pos = nx.circular_layout(G) # nx.draw_networkx(G, pos, with_labels=True) # plt.show() """Step2------对网络缺失边预测得分值""" # preds = nx.adamic_adar_index(G) # preds = nx.common_neighbors(G) # preds = nx.jaccard_coefficient(G) preds = nx.resource_allocation_index(G) preds = sorted(preds, key=itemgetter(2), reverse=True) # 根据连边的预测得分值降序排序 """Step3------AUC,Precision指标的计算""" auc_result = AUC_Indx(preds, test_set, non_edges_set) precision_result = Precision_Index(preds, test_set) """输出结果""" print "AUC:", auc_result, "Precision:", precision_result return auc_result, precision_result
def feature_calculate(g, data, column_names, save_to): pairs = list(map(lambda x: (x[0], x[1]), data)) jaccard = nx.jaccard_coefficient(g, pairs) preferential = nx.preferential_attachment(g, pairs) rai = nx.resource_allocation_index(g, pairs) # shortest path total = len(data) current = 0 for row_data in zip(data, jaccard, preferential, rai): row = row_data[0] try: thisjaccard = row_data[1][2] except: thisjaccard = -1 try: thispreferential = row_data[2][2] except: thispreferential = -1 try: thisrai = row_data[3][2] except: thisrai = -1 # pred = row_data[1] # resource_allocation_index = pred[2] if current % 1000 == 0: ut.log("calculating {}/{}...".format(current, total)) path_length = 99999 try: path = nx.shortest_path(g, row[0], row[1], 'weight') path_length = len(path) except: pass # shortest path row.insert(-1, path_length) # jaccard row.insert(-1, thisjaccard) # preferential row.insert(-1, thispreferential) # rai row.insert(-1, thisrai) current += 1 original_columns_titles = list(column_names) original_columns_titles.insert(-1, "shortest_path_count") original_columns_titles.insert(-1, "jaccard") original_columns_titles.insert(-1, "preferential") original_columns_titles.insert(-1, "rai") data.insert(0, original_columns_titles) ut.write_list_csv(save_to, data)
def sort_edges_by_resource_allocation(graph, edges): edges_sorted = sorted(list(nx.resource_allocation_index(graph, edges)), key=lambda l: l[2], reverse=True, cmp=compare_with_ties) return [(row[0], row[1]) for row in edges_sorted], [row[2] for row in edges_sorted]
def link_prediction_with_metrics(subgraph, tuples, df): jaccard_coefficient_list = list(nx.jaccard_coefficient(subgraph, tuples)) y_test = create_test_data(jaccard_coefficient_list) print( f"ROC AUC Score with Jaccard Coefficient: {roc_auc_score(df['link'], y_test)}\n" f"Average Precision with Jaccard Coefficient: {average_precision_score(df['link'], y_test)}" ) adamic_adar_list = list(nx.adamic_adar_index(subgraph, tuples)) y_test = create_test_data(adamic_adar_list) print( f"ROC AUC Score with Adamic Adar Index: {roc_auc_score(df['link'], y_test)}\n" f"Average Precision with Adamic Adar Index: {average_precision_score(df['link'], y_test)}" ) preferential_attachment_list = list( nx.preferential_attachment(subgraph, tuples)) y_test = create_test_data(preferential_attachment_list) print( f"ROC AUC Score with Preferential Attachment: {roc_auc_score(df['link'], y_test)}\n" f"Average Precision with Preferential Attachment: {average_precision_score(df['link'], y_test)}" ) resource_allocation_list = list( nx.resource_allocation_index(subgraph, tuples)) y_test = create_test_data(resource_allocation_list) print( f"ROC AUC Score with Resource Allocation Index: {roc_auc_score(df['link'], y_test)}\n" f"Average Precision with Resource Allocation Index: {average_precision_score(df['link'], y_test)}" )
def new_connections_predictions(): df = future_connections df['jaccard_coefficient'] = [ x[2] for x in nx.jaccard_coefficient(G, df.index) ] df['resource_allocation_index'] = [ x[2] for x in nx.resource_allocation_index(G, df.index) ] df['preferential_attachment'] = [ x[2] for x in nx.preferential_attachment(G, df.index) ] df['common_neighbors'] = df.index.map( lambda ind: len(list(nx.common_neighbors(G, ind[0], ind[1])))) print('.......we have extracted all the features......') df_train = df[~pd.isnull(df['Future Connection'])] df_test = df[pd.isnull(df['Future Connection'])] features = [ 'jaccard_coefficient', 'resource_allocation_index', 'preferential_attachment', 'common_neighbors' ] X_train = df_train[features] Y_train = df_train['Future Connection'] X_test = df_test[features] scaler = MinMaxScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) clf = LogisticRegression(solver='liblinear', random_state=14) clf.fit(X_train_scaled, Y_train) predictions = np.round(clf.predict_proba(X_test_scaled)[:, 1], 2) results = pd.Series(data=predictions, index=X_test.index) results = results.sort_values(ascending=False) return results # print (new_connections_predictions())
def extract_features(self, prediction_set=None): edge_features = defaultdict(dict) print '{0} | extract_features: res_alloc'.format(str(datetime.now())) res_alloc = nx.resource_allocation_index(self.G, ebunch=prediction_set) self.append_features(edge_features, feature_name='res_alloc', feature_list=res_alloc) print '{0} | extract_features: jaccard_coef'.format(str( datetime.now())) jaccard_coef = nx.jaccard_coefficient(self.G, ebunch=prediction_set) self.append_features(edge_features, feature_name='jaccard_coef', feature_list=jaccard_coef) print '{0} | extract_features: adamic_adar'.format(str(datetime.now())) adamic_adar = nx.adamic_adar_index(self.G, ebunch=prediction_set) self.append_features(edge_features, feature_name='adamic_adar', feature_list=adamic_adar) print '{0} | extract_features: pref_attachment'.format( str(datetime.now())) pref_attachment = nx.preferential_attachment(self.G, ebunch=prediction_set) self.append_features(edge_features, feature_name='pref_attachment', feature_list=pref_attachment) # reformat feature dictionary to a dataframe object df, feature_names = self.feature_dict_to_df(edge_features) return df, feature_names
def SimilarityMeasures(G): # resource_allocation_index preds = nx.resource_allocation_index(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************') # Common neighours print(sorted(nx.common_neighbors(G, 1, 2))) print('****************************') # jaccard coefficient preds = nx.jaccard_coefficient(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************') # AdamicAdar preds = nx.adamic_adar_index(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************') # Preferential Attachment (PA), preds = nx.preferential_attachment(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************')
def L_P_RA(network): num_add = 0 # the number of egdes to be added nodes_pair_without_edge = [] # the pairs of nodes without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score_without_edge = 0.0 # the sum of scores of pairs of nodes without edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes(), 1)): for j, elej in enumerate(list(network.nodes(), 1)): if i >= j: continue if not network.has_edge(elei, elej): try: pre = nx.resource_allocation_index(network, [(elei, elej)]) for u, v, s in pre: score = s except: continue total_score_without_edge += score nodes_pair_without_edge.append((elei, elej, score)) for a, b, c in nodes_pair_without_edge: probability_add.append( c / total_score_without_edge ) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select_distinct(nodes_pair_without_edge, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True
def new_connections_predictions(): future_connections['preferential_attachment'] = [ i[2] for i in nx.preferential_attachment(G, future_connections.index) ] future_connections['Common Neighbors'] = future_connections.index.map( lambda x: len(list(nx.common_neighbors(G, x[0], x[1])))) future_connections['resource_allocation'] = [ i[2] for i in nx.resource_allocation_index(G, future_connections.index) ] future_connections['jaccard'] = [ i[2] for i in nx.jaccard_coefficient(G, future_connections.index) ] final_test = future_connections[ future_connections['Future Connection'].isnull() == True] train = future_connections.dropna() final_test.drop(['Future Connection'], axis=1, inplace=True) X = train.drop(['Future Connection'], axis=1) y = train['Future Connection'] from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=3) clf = RandomForestClassifier(n_estimators=100, max_depth=5, max_features=None, random_state=0) ran = clf.fit(X, y) pred = ran.predict_proba(final_test) pred1 = [i[1] for i in pred] final_test['pred'] = pred1 return final_test['pred']
def get_test_features(): features = [] count = 0 print("Generating test data features......") for temp_data in test_edges: if (count % 100 == 0): print(count) count += 1 feature = [] try: preds = nx.resource_allocation_index(G, [temp_data]) for u, v, p in preds: feature.append(p) preds = nx.jaccard_coefficient(G, [temp_data]) for u, v, p in preds: feature.append(p) except: print("one error at: "+str(count)) pass features.append(feature) print("positive features: "+str(len(features))) return features
def link_scores(graph, all_dfs, labels, g_undirected): lst = [] lst2 = [] predictions1 = nx.preferential_attachment(g_undirected, g_undirected.edges()) [lst.append((u, v, p)) for u, v, p in predictions1] predictions1 = {(k, v): n for k, v, n in lst} all_dfs['Preferential_Attachment'] = all_dfs.apply( lambda x: map_predictions_to_df(predictions1, x), axis=1) predictions3 = nx.resource_allocation_index(g_undirected, g_undirected.edges()) try: [lst2.append((u, v, p)) for u, v, p in predictions3] predictions3 = {(k, v): n for k, v, n in lst2} all_dfs['Resource_allocation'] = all_dfs.apply( lambda x: map_predictions_to_df(predictions3, x), axis=1) except ZeroDivisionError: print("ZeroDivisionError: float division by zero") return all_dfs
def extract_network_based(data): print("Extracting Network Based Fearure...") tid1 = data['tid1'].values tid2 = data['tid2'].values num_nodes = np.max((tid1.max(), tid2.max())) + 1 G = nx.Graph() G.add_nodes_from(range(num_nodes)) for u, v in zip(np.concatenate([tid1, tid2]), np.concatenate([tid2, tid1])): G.add_edge(u, v) preds = nx.resource_allocation_index(G, list(zip(tid1, tid2))) data['nx1'] = [p for (u, v, p) in preds] preds = nx.jaccard_coefficient(G, list(zip(tid1, tid2))) data['nx2'] = [p for (u, v, p) in preds] preds = nx.preferential_attachment(G, list(zip(tid1, tid2))) data['nx3'] = [p for (u, v, p) in preds] G.add_node(num_nodes) for i in range(num_nodes): G.add_edge(i, num_nodes) G.add_edge(num_nodes, i) preds = nx.adamic_adar_index(G, list(zip(tid1, tid2))) data['nx4'] = [p for (u, v, p) in preds] return data
def prepare_data_ft(): df = future_connections # Triadic Measurements return (NodeA, NodeB, coef). use i[2] to access the coef df['AAI'] = [i[2] for i in nx.adamic_adar_index(G, df.index)] df['JCI'] = [i[2] for i in nx.jaccard_coefficient(G, df.index)] df['RA'] = [i[2] for i in nx.resource_allocation_index(G, df.index)] df['PA'] = [i[2] for i in nx.preferential_attachment(G, df.index)] return df
def resource_allocation(G, feat, ledge): feat['R_Allocation'] = 0.0 for i in feat.index.values: if (G.has_node(feat['Node_1'][i]) and G.has_node(feat['Node_2'][i])): values = nx.resource_allocation_index( G, [(feat['Node_1'][i], feat['Node_2'][i])]) for v in values: feat['R_Allocation'][i] = v[2]
def new_connections_predictions(): # Your Code Here from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.metrics import roc_auc_score for node in G.nodes(): G.node[node]['community'] = G.node[node]['Department'] preferential_attachment = list(nx.preferential_attachment(G)) df = pd.DataFrame(index=[(x[0], x[1]) for x in preferential_attachment]) df['preferential_attachment'] = [x[2] for x in preferential_attachment] cn_soundarajan_hopcroft = list(nx.cn_soundarajan_hopcroft(G)) df_cn_soundarajan_hopcroft = pd.DataFrame( index=[(x[0], x[1]) for x in cn_soundarajan_hopcroft]) df_cn_soundarajan_hopcroft['cn_soundarajan_hopcroft'] = [ x[2] for x in cn_soundarajan_hopcroft ] df = df.join(df_cn_soundarajan_hopcroft, how='outer') df['cn_soundarajan_hopcroft'] = df['cn_soundarajan_hopcroft'].fillna( value=0) df['resource_allocation_index'] = [ x[2] for x in list(nx.resource_allocation_index(G)) ] df['jaccard_coefficient'] = [x[2] for x in list(nx.jaccard_coefficient(G))] df = future_connections.join(df, how='outer') df_train = df[~pd.isnull(df['Future Connection'])] df_test = df[pd.isnull(df['Future Connection'])] features = [ 'cn_soundarajan_hopcroft', 'preferential_attachment', 'resource_allocation_index', 'jaccard_coefficient' ] df_test = df_test[features] X_train, X_test, y_train, y_test = train_test_split( df_train[features], df_train['Future Connection'], random_state=0, test_size=0.5) clf_RF = RandomForestClassifier(max_features=3, random_state=0, max_depth=3, min_samples_leaf=3, criterion='entropy') clf_RF.fit(X_train, y_train) clf_GDBT = GradientBoostingClassifier(learning_rate=0.01, max_depth=8, random_state=0, n_estimators=30) clf_GDBT.fit(X_train, y_train) roc_score_forest = roc_auc_score(y_test, clf_RF.predict_proba(X_test)[:, 1]) roc_score = roc_auc_score(y_test, clf_GDBT.predict_proba(X_test)[:, 1]) print(roc_score_forest) print(roc_score) #test_proba = clf_RF.predict_proba(X_test)[:, 1] preds = pd.Series(data=clf_GDBT.predict_proba(df_test)[:, 1], index=df_test.index) return preds # Your Answer Here
def new_connections_predictions(): pref_attach = list(nx.preferential_attachment(G)) df = pd.DataFrame(index=[(x[0], x[1]) for x in pref_attach]) df['pref_attch'] = [x[2] for x in pref_attach] common_neigh = [(e[0], e[1], len(list(nx.common_neighbors(G, e[0], e[1])))) for e in nx.non_edges(G)] df1 = pd.DataFrame(index=[(x[0], x[1]) for x in common_neigh]) df1['common_neigh'] = [x[2] for x in common_neigh] df = df.join(df1, how='outer') df['common_neigh'] = df['common_neigh'].fillna(value=0) del df1 community_common_neigh = list( nx.cn_soundarajan_hopcroft(G, community='Department')) df1 = pd.DataFrame(index=[(x[0], x[1]) for x in community_common_neigh]) df1['community_common_neigh'] = [x[2] for x in community_common_neigh] df = df.join(df1, how='outer') df['community_common_neigh'] = df['community_common_neigh'].fillna(value=0) del df1 community_res_alloc = list( nx.ra_index_soundarajan_hopcroft(G, community='Department')) df1 = pd.DataFrame(index=[(x[0], x[1]) for x in community_res_alloc]) df1['community_res_alloc'] = [x[2] for x in community_res_alloc] df = df.join(df1, how='outer') df['community_res_alloc'] = df['community_res_alloc'].fillna(value=0) del df1 df['res_alloc'] = [x[2] for x in list(nx.resource_allocation_index(G))] df['jaccard_coeff'] = [x[2] for x in list(nx.jaccard_coefficient(G))] features = [ 'jaccard_coeff', 'res_alloc', 'pref_attch', 'common_neigh', 'community_common_neigh', 'community_res_alloc' ] df = future_connections.join(df, how='outer') df_train = df[~pd.isnull(df['Future Connection'])] df_test = df[pd.isnull(df['Future Connection'])] X_train = df_train[features] X_test = df_test[features] y_train = df_train['Future Connection'] scalar = MinMaxScaler() X_train_scaled = scalar.fit_transform(X_train) X_test_scaled = scalar.fit_transform(X_test) clf = RandomForestClassifier(n_estimators=100, n_jobs=-1, max_depth=10, random_state=0).fit(X_train_scaled, y_train) test_proba = clf.predict_proba(X_test_scaled)[:, 1] predictions = pd.Series(test_proba, X_test.index) # target = future_connections[pd.isnull(future_connections['Future Connection'])] # target['proba'] = [predictions[x] for x in target.index] return predictions
def new_connections_predictions(): # Your Code Here for n in G.nodes(): G.node[n]['community'] = G.node[n]['Department'] #df = pd.DataFrame(index=[(x[0], x[1]) for x in list(nx.preferential_attachment(G))]) future_connections['common_neighbors'] = [ len(list(nx.common_neighbors(G, x[0], x[1]))) for x in future_connections.index ] future_connections['jaccard_coefficient'] = [ list(nx.jaccard_coefficient(G, [x]))[0][2] for x in future_connections.index ] future_connections['resource_allocation_index'] = [ list(nx.resource_allocation_index(G, [x]))[0][2] for x in future_connections.index ] future_connections['adamic_adar_index'] = [ list(nx.adamic_adar_index(G, [x]))[0][2] for x in future_connections.index ] future_connections['preferential_attachment'] = [ list(nx.preferential_attachment(G, [x]))[0][2] for x in future_connections.index ] future_connections['cn_soundarajan_hopcroft'] = [ list(nx.cn_soundarajan_hopcroft(G, [x]))[0][2] for x in future_connections.index ] #future_connections['ra_soundarajan_hopcroft'] = [list(nx.ra_soundarajan_hopcroft(G, [x]))[0][2] for x in future_connections.index] future_connections['cn_soundarajan_hopcroft'] = future_connections[ 'cn_soundarajan_hopcroft'].fillna(value=0) #future_connections['ra_soundarajan_hopcroft'] = df['cn_soundarajan_hopcroft'].fillna(value=0) #future_connections.join(df,how='outer') features = [ 'jaccard_coefficient', 'resource_allocation_index', 'adamic_adar_index', 'preferential_attachment', 'cn_soundarajan_hopcroft' ] X_train = future_connections.loc[ ~pd.isnull(future_connections['Future Connection']), features] y_train = future_connections.loc[ ~pd.isnull(future_connections['Future Connection']), ['Future Connection']] X_test = future_connections.loc[( pd.isnull(future_connections['Future Connection'])), features] classifier = MLPClassifier(hidden_layer_sizes=[10, 5], solver='lbfgs', alpha=10) classifier.fit(X_train, y_train) y_predicted = classifier.predict_proba(X_test)[:, 1] return pd.Series(y_predicted, X_test.index) # Your Answer Here
def resource_allocation_index(graph, train_set, test_set): if not os.path.isfile("./data/resource_allocation_training.csv"): print("Computing training resource-allocation index") t0 = clock() results = [] for i in range(0, len(train_set)): preds = nx.resource_allocation_index(graph, [(train_set[i][0], train_set[i][1])]) results.append([float(p) for u, v, p in preds]) if i % 5000 == 0: print(i) print("Some elements of results:", results[0:5]) with open("./data/resource_allocation_training.csv", 'w') as file: csv_out = csv.writer(file) for i in range(0, len(train_set)): csv_out.writerow(results[i]) if i % 100000 == 0: print(i) print(clock() - t0) if not os.path.isfile("./data/resource_allocation_testing.csv"): print("Computing testing resource-allocation index") t0 = clock() results = [] for i in range(0, len(test_set)): preds = nx.resource_allocation_index(graph, [(test_set[i][0], test_set[i][1])]) results.append([float(p) for u, v, p in preds]) if i % 5000 == 0: print(i) print("Some elements of results:", results[0:5]) with open("./data/resource_allocation_testing.csv", 'w') as file: csv_out = csv.writer(file) for i in range(0, len(test_set)): csv_out.writerow(results[i]) if i % 100000 == 0: print(i) print(clock() - t0)
def edge_feature(self, e): return numpy.array([ math.sqrt(float(len(set(self.G[e[0]]).intersection(self.G[e[1]])))), \ float(abs(nx.clustering(self.G, e[0]) - nx.clustering(self.G, e[1]))), \ float(list(nx.jaccard_coefficient(self.G, [(e[0], e[1])]))[0][2]), \ float(list(nx.resource_allocation_index(self.G, [(e[0], e[1])]))[0][2]),\ float(min(len(self.G[e[0]]), len(self.G[e[1]]))) / float(max(len(self.G[e[0]]), len(self.G[e[1]]))), \ 1.0 ])
def predict(self, node_pairs): predictions = resource_allocation_index(self.graph, node_pairs) return list(predictions) def __repr__(self): return self.__str__() def __str__(self): return 'ResourceAllocation'
def new_connections_predictions(): pref_atch = list(nx.preferential_attachment(G)) new_df = pd.DataFrame(index=[(x[0], x[1]) for x in pref_atch]) new_df["PrefrentialAttachment"] = [x[2] for x in pref_atch] cn_soundarajan_hopcroft = list( nx.cn_soundarajan_hopcroft(G, community="Department")) df_cn_soundarajan_hopcroft = pd.DataFrame( index=[(x[0], x[1]) for x in cn_soundarajan_hopcroft]) df_cn_soundarajan_hopcroft['CommunityCommonNeighbor'] = [ x[2] for x in cn_soundarajan_hopcroft ] new_df = new_df.join(df_cn_soundarajan_hopcroft, how='outer') new_df['CommunityCommonNeighbor'] = new_df[ 'CommunityCommonNeighbor'].fillna(value=0) res_alo = list(nx.resource_allocation_index(G)) df_res_alo = pd.DataFrame(index=[(x[0], x[1]) for x in res_alo]) df_res_alo["ResourceAllocationIndex"] = [x[2] for x in res_alo] new_df = new_df.join(df_res_alo, how='outer') new_df['ResourceAllocationIndex'] = new_df[ 'ResourceAllocationIndex'].fillna(value=0) jac_coef = list(nx.jaccard_coefficient(G)) df_jac_coef = pd.DataFrame(index=[(x[0], x[1]) for x in jac_coef]) df_jac_coef["JaccardCoeffiecient"] = [x[2] for x in jac_coef] new_df = new_df.join(df_jac_coef, how='outer') new_df['JaccardCoeffiecient'] = new_df['JaccardCoeffiecient'].fillna( value=0) new_df = new_df.join(future_connections, how='outer') train_df = new_df[~new_df["Future Connection"].isnull()] test_df = new_df[new_df["Future Connection"].isnull()] features = [ "PrefrentialAttachment", "CommunityCommonNeighbor", "ResourceAllocationIndex", "JaccardCoeffiecient" ] X_train = train_df[features] X_test = test_df[features] y_train = train_df["Future Connection"] scaler = MinMaxScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) clf = MLPClassifier(hidden_layer_sizes=[10, 5], alpha=5, random_state=0, solver='lbfgs', verbose=0) clf.fit(X_train_scaled, y_train) rslt = clf.predict_proba(X_test_scaled)[:, 1] final_rslt = pd.Series(rslt, index=X_test.index) return final_rslt
def resourceAllocation(G, X): resource_allocation = [] for i in range(X.shape[0]): try: coef = [[u, v, p] for u, v, p in nx.resource_allocation_index( G, [(X[i][0], X[i][1])])][0] resource_allocation.append(coef[2]) except: resource_allocation.append(0) return resource_allocation
def resourceAllocationIndex(self, buyerPairs): # Sort the allocatio indexes in ascending order. allocation = nx.resource_allocation_index(self.networkG, buyerPairs) allocation = [entry for entry in allocation if entry[2] > 1.5] allocation = list(set(allocation)) #Print allocation indexes for entry in allocation: u, v, p = entry print('Node pair: [', u, ']', '[', v, '] -> ', 'Index = ', p)
def calRA(nodeA, nodeB): if (UDG.has_node(nodeA) and UDG.has_node(nodeB)): try: AA = nx.resource_allocation_index(UDG, [(nodeA, nodeB)]) for u, v, p in AA: return p except ZeroDivisionError: return 0 else: return 0
def generate_algo(graph, X): res_alloc_index = np.asarray(list(nx.resource_allocation_index(graph, X)))[:, 2] jac_coef = np.asarray(list(nx.jaccard_coefficient(graph, X)))[:, 2] ad_adar_idx = np.asarray(list(nx.adamic_adar_index(graph, X)))[:, 2] pref_att = np.asarray(list(nx.preferential_attachment(graph, X)))[:, 2] #cn_sound_hop=list(nx.cn_soundarajan_hopcroft(graph, X)) #ra_sound_hop =list( nx.ra_index_soundarajan_hopcroft(graph, X)) #within = list(nx.within_inter_cluster(graph, X)) return list(res_alloc_index), list(jac_coef), list(ad_adar_idx), list( pref_att)
def resourceAllocationIndex(self, buyerPairs): # Sort the allocatio indexes in ascending order. print('Computing Similarity Scors using Resource Allocation Index ...') allocation = nx.resource_allocation_index(self.networkG, buyerPairs) allocation = [entry for entry in allocation if entry[2] > self.threshold] allocation = list(set(allocation)) print('Computation Done!') print('\n') return(list(set(allocation)))
def prob_in_net(n, m, graphs_test, train_edges_0, test_edges_0, matrixes, M_test): feature_train = [] feature_test = [] feature_train_1 = [] feature_test_1 = [] pred_prob = [] pred = [] roc_auc = [] roc_avg = 0 truth_test = [] for i in range(k_net): #jaccard_coefficient feature_train.append( nx.resource_allocation_index(graphs_test[i], train_edges_0)) feature_test.append( nx.resource_allocation_index(graphs_test[i], test_edges_0)) A2 = M_test[i] * M_test[i] A3 = A2 * M_test[i] Lp_matrix = A2 + (0.001 * A3) (features_train, truth_train) = similarity_Features(Lp_matrix, matrixes[i], train_edges_0) (features_test, truth) = similarity_Features(Lp_matrix, matrixes[i], test_edges_0) truth_test.append(truth) LR = LogisticRegression(class_weight='balanced') LR.fit(features_train, truth_train) pred.append(LR.predict(features_test)) x = LR.predict_proba(features_test)[:, 1] pred_prob.append(x) fpr, tpr, thrshold = metrics.roc_curve(truth, pred[i]) roc = metrics.auc(fpr, tpr) roc_auc.append(roc) print('Net', i, roc) roc_avg += roc roc_auc.append(roc_avg / k_net) return (roc_auc, matrix, pred_prob, truth_test)
def sort_edges_by_resource_allocation(graph, edges): edges_sorted = sorted(list(nx.resource_allocation_index( graph, edges)), key=lambda l: l[2], reverse=True, cmp=compare_with_ties) return [(row[0], row[1]) for row in edges_sorted], [row[2] for row in edges_sorted]
def link_prediction(G, query_nodes, target_nodes, n_edges, start_dist, alg = "ra"): """Selects a random set of links between based on the scores calculated by a standard link-prediction algorithm from networkx library Parameters ---------- G : Networkx graph The graph from which the team will be selected. query : list The set of nodes from which random walker starts. target : list The set of nodes from where the random walker ends. n_edges : integer the number of links to be added start_dist: list The starting distribution over the query set alg: string A string describing the link-prediction algorithm to be used Returns ------- links : list The set of links that reduce the absorbing RW centrality ac_scores: list The set of scores of adding the links """ assert alg in ["ra", "pa", "jaccard", "aa"], "alg must be one of [\"ra\", \"pa\", \"jaccard\", \"aa\"]." H = G.copy() query_set_size = len(query_nodes) map_query_to_org = dict(zip(query_nodes, range(query_set_size))) P = csc_matrix(nx.google_matrix(H, alpha=1)) P_abs = P[list(query_nodes),:][:,list(query_nodes)] F = compute_fundamental(P_abs) row_sums = start_dist.dot(F.sum())[0,0] candidates = list(product(query_nodes, target_nodes)) eligible = [candidates[i] for i in range(len(candidates)) if H.has_edge(candidates[i][0], candidates[i][1]) == False] links_to_add = [] if alg == 'ra': preds = nx.resource_allocation_index(H, eligible) elif alg == 'jaccard': preds = nx.jaccard_coefficient(H, eligible) elif alg == 'aa': preds = nx.adamic_adar_index(H, eligible) elif alg == 'pa': preds = nx.preferential_attachment(H, eligible) for u,v,p in preds: links_to_add.append((u,v,p)) links_to_add.sort(key=lambda x: x[2], reverse = True) ac_scores = [] ac_scores.append(row_sums) i = 0 while i < n_edges: F_updated = update_fundamental_mat(F, H, map_query_to_org, links_to_add[i][0]) H.add_edge(links_to_add[i][0], links_to_add[i][1]) abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0] F = F_updated ac_scores.append(abs_cen) i += 1 return links_to_add, ac_scores