def get_AA_index(self, u, v, graph): # return np.array([2 * len(CN_dict[edge]) / (D_dict[edge[0]] + D_dict[edge[1]]) for edge in edges]) if (u, v) in nx.edges(graph): graph.remove_edge(u, v) value = sum( 1 / math.log(nx.degree(graph, n)) for n in list(nx.common_neighbors(graph, u, v))) if len( list(nx.common_neighbors(graph, u, v))) != 0 else 0 graph.add_edge(u, v) else: value = sum( 1 / math.log(nx.degree(graph, n)) for n in list(nx.common_neighbors(graph, u, v))) if len( list(nx.common_neighbors(graph, u, v))) != 0 else 0 return value
def predict(u, v): Cu = _community(G, u, community) Cv = _community(G, v, community) if Cu != Cv: return 0 cnbors = nx.common_neighbors(G, u, v) return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu)
def predict(self, node_pairs): predictions = [] for node_pair in node_pairs: uNeighborhood = self.graph.neighbors(node_pair[0]) vNeighborhood = self.graph.neighbors(node_pair[1]) uNeighborhood = list(uNeighborhood) vNeighborhood = list(vNeighborhood) intersectionNeighbors = list( common_neighbors(self.graph, node_pair[0], node_pair[1])) unionNeighbors = set().union(uNeighborhood, vNeighborhood) a = len(intersectionNeighbors) b = len(unionNeighbors) c = len(unionNeighbors) d = len(self.graph) - len(unionNeighbors) denominator = ((a + b) * (b + d)) + ((a + c) * (c + d)) predictions.append( (node_pair[0], node_pair[1], 2 * (a * d - b * c) / denominator if denominator != 0 else 0)) return predictions def __repr__(self): return self.__str__() def __str__(self): return 'AdjustedRand'
def SimilarityMeasures(G): # resource_allocation_index preds = nx.resource_allocation_index(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************') # Common neighours print(sorted(nx.common_neighbors(G, 1, 2))) print('****************************') # jaccard coefficient preds = nx.jaccard_coefficient(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************') # AdamicAdar preds = nx.adamic_adar_index(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************') # Preferential Attachment (PA), preds = nx.preferential_attachment(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)]) for u, v, p in preds: print('(%d, %d) -> %.8f' % (u, v, p)) print('****************************')
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) cosine_val = math.sqrt(G.degree(u) * G.degree(v)) if cosine_val == 0: return 0 else: return len(cnbors) / cosine_val
def test_clustering_score(self): """ Test global clustering score with generalized formula This is the average of the local clustering scores for each node v: 2 Nv where Kv = degree C(v) = ---------- Nv = number of edges between Kv (Kv - 1) the neighbors of v """ test_data_path = os.path.join(self._fixtures_dir, 'les-miserables.csv') results = ctd.get_summary(test_data_path) graph = ctd.get_graph(test_data_path) local_scores = [] for v in graph.nodes(): k = graph.degree(v) neighbor_links = [] for u in nx.all_neighbors(graph, v): neighbor_links += [ tuple(sorted((u, w))) for w in nx.common_neighbors(graph, u, v) ] n = len(list(set(neighbor_links))) local_scores.append( 2 * n / float(k * (k - 1))) if k > 1 else local_scores.append(0) self.assertAlmostEqual(results['clustering'], sum(local_scores) / float(len(local_scores)))
def new_connections_predictions(): df = future_connections df['jaccard_coefficient'] = [ x[2] for x in nx.jaccard_coefficient(G, df.index) ] df['resource_allocation_index'] = [ x[2] for x in nx.resource_allocation_index(G, df.index) ] df['preferential_attachment'] = [ x[2] for x in nx.preferential_attachment(G, df.index) ] df['common_neighbors'] = df.index.map( lambda ind: len(list(nx.common_neighbors(G, ind[0], ind[1])))) print('.......we have extracted all the features......') df_train = df[~pd.isnull(df['Future Connection'])] df_test = df[pd.isnull(df['Future Connection'])] features = [ 'jaccard_coefficient', 'resource_allocation_index', 'preferential_attachment', 'common_neighbors' ] X_train = df_train[features] Y_train = df_train['Future Connection'] X_test = df_test[features] scaler = MinMaxScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) clf = LogisticRegression(solver='liblinear', random_state=14) clf.fit(X_train_scaled, Y_train) predictions = np.round(clf.predict_proba(X_test_scaled)[:, 1], 2) results = pd.Series(data=predictions, index=X_test.index) results = results.sort_values(ascending=False) return results # print (new_connections_predictions())
def linkProb(g, nodei: str, nodej: str): ni_nj = list(nx.common_neighbors(g, str(nodei), str(nodej))) prob = [len(list(g.neighbors(i))) for i in ni_nj] total = 0 for i in prob: total += 1 / math.log(i) return total
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) sumDg = max(G.degree(u), G.degree(v)) if sumDg == 0: return 0 else: return len(cnbors) / sumDg
def create_features(self, G_train, edge_bunch): i = 0 X = [] page_rank = nx.pagerank_scipy(G_train) for pair in edge_bunch: commmon_neighbors = len( list(nx.common_neighbors(G_train, pair[0], pair[1]))) jaccard_coefficient = nx.jaccard_coefficient(G_train, [pair]).next()[2] adamic_adar = nx.adamic_adar_index(G_train, [pair]).next()[2] degree_0 = nx.degree(G_train, pair[0]) degree_1 = nx.degree(G_train, pair[1]) prod = degree_0 * degree_1 page_rank_0 = page_rank[pair[0]] page_rank_1 = page_rank[pair[1]] f = [ degree_0, degree_1, prod, commmon_neighbors, jaccard_coefficient, adamic_adar, page_rank_0, page_rank_1, ] X.append(f) i += 1 if i % 1000000 == 0: print(i) return np.array(X)
def cal_net_features(self): common_neighbors = [len(list(nx.common_neighbors(self.graph, u, v))) for u, v in self.graph.edges] com_mean = np.mean(np.array(common_neighbors)) com_var = np.var(np.array(common_neighbors)) degree_sequence = sorted([d for n, d in self.graph.degree()], reverse=True) core_count = len([i for i in degree_sequence if i > np.quantile(degree_sequence, 0.75)]) return com_mean, com_var, core_count
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) union_size = len(set(G[u]) | set(G[v])) if union_size == 0: return 0 else: return len(cnbors) / union_size
def update_dicts_of_common_neighbors_info(self, node): # gathers info about the number/max number of common neighbors of the given node and its neighbors # (in another word): counts the number of triangles a node participates in and max number of triangles that # this node simultaneously participates in with a neighbor node. # initializing for a node that has not been visited yet. if (node in self.dict_common_neighbors) is False: self.dict_common_neighbors[node] = {} self.max_common_neighbors[node] = -1 for neighbor in self.graph.neighbors(node): if neighbor in self.dict_common_neighbors[node]: continue if (neighbor in self.dict_common_neighbors) is False: self.dict_common_neighbors[neighbor] = {} self.max_common_neighbors[neighbor] = -1 number_common_neighbors = sum( 1 for _ in nx.common_neighbors(self.graph, node, neighbor)) self.dict_common_neighbors[node][ neighbor] = number_common_neighbors self.dict_common_neighbors[neighbor][ node] = number_common_neighbors if number_common_neighbors > self.max_common_neighbors[node]: self.max_common_neighbors[node] = number_common_neighbors if number_common_neighbors > self.max_common_neighbors[neighbor]: self.max_common_neighbors[neighbor] = number_common_neighbors
def AdamicAdarIndex(g, edge): if g is None or edge is None: return g_undirect = nx.to_undirected(g) source = edge[0] dest = edge[1] common = nx.common_neighbors(g_undirect, source, dest) index = 0.0 number_of_neighbors = 0 #Adamic-Adar: for neigh in common: index += 1/math.log(g_undirect.degree(neigh), 10) number_of_neighbors += 1 #Maximum Adamic-Adar: max_adamic_adar = (1/math.log(2,10))*number_of_neighbors normalized_adamic_adar = (float(index)/float(max_adamic_adar)) if max_adamic_adar != 0 else 0 return normalized_adamic_adar
def get_edge_embeddedness(graph, pairs): c = Column(1, 'numerical') value = dict() for pair in pairs: value[pair] = len(list(nx.common_neighbors(graph, pair[0], pair[1]))) c.value = value return c
def predict(u, v): result = 0 for node in nx.common_neighbors(G, u, v): #result += 1. * distance(dic[node],dic[u],dic[v]) #result += (1. * distance(dic[node],dic[u],dic[v])) / (np.log10(len(G[node])) * averageDis) result += (1. ) / (float(len(G[node]))) return result
def print_sim_nodes(g, k=10): CN = [] # common neighbors JC = [] # jaccard coefficient AA = [] # adamic_adar_index PA = [] # preferential attachment # nodeと次のノード取得 nodes = list(g.nodes()) l = g.number_of_nodes() for i, x in enumerate(nodes): if i < (l - 1): y = nodes[i + 1] CN.append(tuple([x, y, len(list(nx.common_neighbors(g, x, y)))])) JC.append(list(nx.jaccard_coefficient(g, [(x, y)]))[0]) AA.append(list(nx.adamic_adar_index(g, [(x, y)]))[0]) PA.append(list(nx.preferential_attachment(g, [(x, y)]))[0]) # top k print("vertex pair:", x, "and", y) print("common neighbors") print(sorted(CN, key=lambda x: x[2], reverse=True)[:k]) print("Jaccard coefficient") print(sorted(JC, key=lambda x: x[2], reverse=True)[:k]) print("Adamic/Adar") print(sorted(AA, key=lambda x: x[2], reverse=True)[:k]) print("preferential attachment") print(sorted(PA, key=lambda x: x[2], reverse=True)[:k])
def common_neighbor_scores(g_train,train_test_split): if g_train.is_directed(): # Only works for undirected graphs g_train = g_train.to_undirected() adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \ test_edges, test_edges_false = train_test_split start_time = time.time() cn_scores = {} # Calculate scores cn_matrix = np.zeros(adj_train.shape) for u, v in get_ebunch(train_test_split): # (u, v) = node indices, p = Adamic-Adar index cn = len(list(nx.common_neighbors(g_train, u, v))) cn_matrix[u][v] = cn cn_matrix[v][u] = cn # make sure it's symmetric cn_matrix = cn_matrix / cn_matrix.max() # Normalize matrix runtime = time.time() - start_time # cn_roc, cn_ap = get_roc_score(test_edges, test_edges_false, cn_matrix) val_roc, val_avg, test_roc, test_avg = train_lr(train_test_split, cn_matrix) cn_scores['test_roc'] = test_roc cn_scores['test_ap'] = test_avg cn_scores['runtime'] = runtime return cn_scores
def computeNeighborOverlap(g): # creating a dictionary of edges and thier neighborhood overlap value edgeOverlaps = {} # to iterate over the edges in g edgeIter = iter(g.edges) # next() returns a touple with to and from for i in range(0, len(g.edges)): thisEdge = next(edgeIter) overVal = -1 # current overlap value to = thisEdge[0] # first element of the edge touple, to fromm = thisEdge[1] # second element of the edge touple, from # calculate the edgeOverlap between the two and from of thisEdge # cuv / (ku + kv - 2 - cuv) cuv = sum( 1 for e in nx.common_neighbors(g, to, fromm) ) # shared neighbors of the endpoints ku = sum(1 for e in nx.all_neighbors(g, to)) kv = sum(1 for e in nx.all_neighbors(g, fromm)) overVal = 0 try: overVal = cuv / (ku + kv - 2 - cuv) except: pass edgeOverlaps.update({thisEdge: overVal}) return edgeOverlaps
def collectAdarScores(G, train_edges_name): # find which edges are unconnected in the training df_train = pd.read_csv(train_edges_name) df_train = df_train.replace(np.nan, 'nan', regex=True) #print(G.neighbors('nan')) #print(err) #list_unconnected = df_train.index[df_train['training_labels'] == 0].tolist() #df_train.where(df_train['training_labels']==0)) list_real_labels = [] list_pred_scores = [] for i_row in range(len(df_train.node1)): # for each training set data node1 = df_train.node1[i_row] node2 = df_train.node2[i_row] # Find all nbrs of node1 and node2 in training graph that overlap list_nbrs = sorted(nx.common_neighbors(G, node1, node2)) total_sum = 0 # if list_nbrs isn't empty, find the weights of all the edges connected to the nbrs for i in range(len(list_nbrs)): curr_weight = G.degree(list_nbrs[i], weight='weight') total_sum += -1/np.log(curr_weight) # list_real_labels.append(df_train.labels[i_row]) list_pred_scores.append(total_sum) return list_pred_scores, list_real_labels
def construct_relation_graph(B, set=0, weight_fn=lambda i1, i2, w: len(w), name=""): hash = hashlib.md5(nx.info(B).encode('utf-8')).hexdigest() filename = 'graph_' + hash + '_' + name + '.pkl' if os.path.isfile('cache/' + filename) and False: with open('cache/' + filename, 'rb') as f: dprint("Relation graph loaded...") return pickle.load(f) """ Construct relation graph""" dprint("Constructing relation graph...") # Get buyers and products sets = nx.bipartite.sets(B) # Get all combinations between (0 - buyers, 1 - products) set items combinations = itertools.combinations(sets[set], 2) G = nx.empty_graph(len(sets[set])) # Construct edges with weights edges = [(i1, i2, weight_fn(i1, i2, list(nx.common_neighbors(B, i1, i2)))) for (i1, i2) in combinations] # Add edges to graph G.add_weighted_edges_from([edge for edge in edges if edge[2] is not None]) dprint("Relation graph constructed") # Save to cache with open('cache/' + filename, 'wb') as f: pickle.dump(G, f) return G
def jaca_predict(graph, a, b): number_x = len(list(nx.neighbors(graph, a))) number_y = len(list(nx.neighbors(graph, b))) common = len(list(nx.common_neighbors(graph, a, b))) score = common / (number_x + number_y - common) return score
def L_P_WCN(network: nx.Graph, num_add): nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes without edge and with edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): # initialize score for each edge score = 0.0 if i >= j: continue try: for z in nx.common_neighbors(network, elei, elej): w_elei_z = network.get_edge_data(elei, z).get('weight') w_z_elej = network.get_edge_data(z, elej).get('weight') score += w_elei_z + w_z_elej except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append(c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) ''' for a, b, c in edges_add: network.add_edge(a, b) # add selected edges ''' return edges_add
def predict(u, v): Cu = _community(G, u, community) Cv = _community(G, v, community) cnbors = list(nx.common_neighbors(G, u, v)) neighbors = (sum(_community(G, w, community) == Cu for w in cnbors) if Cu == Cv else 0) return len(cnbors) + neighbors
def __init__(self, preparedParameters, filePathResults, filePathAnalyseResult, topRank): print "Starting Analysing the results", datetime.today() absFilePath = filePathResults absfilePathAnalyseResult = filePathAnalyseResult #FormatingDataSets.get_abs_file_path(filePathAnalyseResult) fResult = open(absFilePath, 'r') with open(absfilePathAnalyseResult, 'w') as fnodes: self.success = 0 element = 0 for line in fResult: element = element+1 FormatingDataSets.printProgressofEvents(element, topRank, "Analysing the results: ") cols = line.strip().replace('\n','').split('\t') if len(list(networkx.common_neighbors(preparedParameters.testGraph, cols[len(cols)-2] , cols[len(cols)-1] ))) != 0: self.success = self.success + 1 fnodes.write(cols[len(cols)-2] + '\t' + cols[len(cols)-1] + '\t' + 'SUCCESS \r\n') else: fnodes.write(cols[len(cols)-2] + '\t' + cols[len(cols)-1] + '\t' + 'FAILED \r\n') if element == topRank: break result = float(self.success) / float(topRank) *100 strResult = 'Final Result: \t' + str(result) + '%' fnodes.write(strResult) fnodes.write('\n#\t'+str(self.success)) fnodes.close() print "Analysing the results finished", datetime.today()
def common_neighbors(features, G): nb_common_neighbors = [] for i in range(features.shape[0]): a = features['From'][i] b = features['To'][i] nb_common_neighbors.append(len(sorted(nx.common_neighbors(G, a, b)))) # ajoute le nombre de voisins communs return nb_common_neighbors
def neighborhoodOverlapDistribution(graph): Dist = {} interval = 0.1 # percorre todos os nos do grafo for n in graph.nodes(): all_neighbors = list(nx.all_neighbors(graph, n)) # percorre os vizinhos de cada no para calculcar o overlap for neighbor in all_neighbors: common_neighbors = list(nx.common_neighbors(graph, n, neighbor)) # checa divisao por zero calc = 0.0 try: #calc = (len(list(common_neighbors))) / (len(list(all_neighbors)) + #(len(list(nx.all_neighbors(graph, neighbor)))) - len(list(common_neighbors)) - 2.0) calc = (len(list(common_neighbors))) / ( (len((set(all_neighbors)) | (set(nx.all_neighbors(graph, neighbor))))) - 2.0) except Exception as e: calc = 0.0 try: Dist[groupBy(calc, interval)] = Dist[groupBy(calc, interval)] + 1 except Exception as e: Dist[groupBy(calc, interval)] = 1 return OrderedDict(sorted(Dist.items(), key=lambda t: t[0]))
def L_P_CN(network): num_add = 0 # the number of egdes to be added nodes_pair_without_edge = [] # the pairs of nodes without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0 # the score of each pair of nodes in link prediction model total_score_without_edge = 0.0 # the sum of scores of pairs of nodes without edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): if i >= j: continue if not network.has_edge(elei, elej): try: score = len(nx.common_neighbors(network, elei, elej)) except: continue total_score_without_edge += score nodes_pair_without_edge.append((elei, elej, score)) for a, b, c in nodes_pair_without_edge: probability_add.append( c / total_score_without_edge ) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select_distinct(nodes_pair_without_edge, probability_add, num_add) for a, b, c in edges_add: network.add_edge(a, b) # add selected edges return True
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) max_val = max(G.degree(u), G.degree(v)) if max_val == 0: return 0 else: return len(cnbors) / max_val
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) borsDg = G.degree(u) * G.degree(v) if borsDg == 0: return 0 else: return len(cnbors) / borsDg
def simpleProximity(self, s, t): #s and t are the mip node IDs, NOT user/obj ids proximity = 0.0 sharedWeight = 0.0 for node in nx.common_neighbors(self.mip, s, t): sharedWeight = sharedWeight + self.mip[s][node]['weight'] + self.mip[t][node]['weight'] #the weight of the path connecting s and t through the current node proximity = sharedWeight/(self.mip.degree(s, weight = 'weight')+self.mip.degree(t, weight = 'weight')+0.000000000001) return proximity
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) mult_val = G.degree(u) * G.degree(v) if mult_val == 0: return 0 else: return len(cnbors)/ mult_val
def predict(u, v): cnbors_len = len(list(nx.common_neighbors(G, u, v))) denomi = G.degree(u) + G.degree(v) if denomi == 0: return 0 else: return (2*cnbors_len) / denomi
def get_TimeofLinks(self, graph, node1, node2): result = [] for node in networkx.common_neighbors(graph, node1, node2): for n,d in graph.nodes(data=True): if n == node: result.append(d['time']) result.sort(reverse=True) return result
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) sum_cn = 0 for w in cnbors: if not G.degree(w) == 0: #print("debug") sum_cn += 1/math.fabs(G.degree(w)) return sum_cn
def adamicAdarProximity(self, s, t): proximity = 0.0 for node in nx.common_neighbors(self.mip, s, t): weights = self.mip[s][node]['weight'] + self.mip[t][node]['weight'] #the weight of the path connecting s and t through the current node if weights!=0: #0 essentially means no connection # print 'weights = '+str(weights) # print 'degree = '+str(self.mip.degree(node, weight = 'weight')) proximity = proximity + (weights*(1/(math.log(self.mip.degree(node, weight = 'weight'))+0.00000000000000000000000001))) #gives more weight to "rare" shared neighbors, adding small number to avoid dividing by zero # print 'proximity = '+str(proximity) return proximity
def predict(u, v): Cu = _community(G, u, community) Cv = _community(G, v, community) if Cu != Cv: return 0 cnbors = set(nx.common_neighbors(G, u, v)) within = set(w for w in cnbors if _community(G, w, community) == Cu) inter = cnbors - within return len(within) / (len(inter) + delta)
def get_BagofWords(self, graph, node1, node2): result = set() for node in networkx.common_neighbors(graph, node1, node2): for n,d in graph.nodes(data=True): if n == node: for keyword in ast.literal_eval(d['keywords']): result.add(keyword) return result
def get_adamic_adar_score(graph, pairs): c = Column(1, 'numerical') value = dict() for pair in pairs: common_nei = nx.common_neighbors(graph, pair[0], pair[1]) score = 0.0 for n in common_nei: score += 1.0 / math.log(len(graph.neighbors(n)) + 1) value[pair] = score c.value = value return c
def generateDataForCalculate(self): if self.trainnigGraph == None: self.generating_Training_Graph() _nodes = sorted(self.trainnigGraph.nodes()) adb = Base(self.filePathTrainingGraph + ".calc.pdl") adb.create('pairNodes', 'common', 'time', 'domain' ) for node in sorted(_nodes): othernodes = set(n for n in _nodes if n > node) for other in othernodes: common = set(networkx.common_neighbors(self.trainnigGraph, node, other)) arestas = self.trainnigGraph.edges([node, other], True)
def merge(G,edge): """Returns a new graph with edge merged, and the new node containing the information lost in the merge. The weights from common neighbors are added together, a la Stoer-Wagner algorithm.""" if G.node[edge[1]]['type'] == 'login' or G.node[edge[1]]['type'] == 'email': edge = edge[::-1] # If login/email is on the right, flip the order, so it's always on the left nx.set_node_attributes(G,'list',{edge[0]:G.node[edge[0]]['list']+[edge[1]]}) J = nx.contracted_edge(G,(edge[0],edge[1]),self_loops = False) #Contract edge without self-loop # Weight stuff N = nx.common_neighbors(G,edge[0],edge[1]) #find common nodes for i in N: J[i][edge[0]]['weight'] = G[edge[0]][i]['weight'] + G[edge[1]][i]['weight'] #modify the weight after contraction return J
def graph_stats(distance_couple, net): distances = [] common_neighbors = [] jaccard = [] adamic = [] edge_bet = [] edge_betweeness = nx.edge_betweenness_centrality(net) for couple in distance_couple: distances.append(couple[1]) common_neighbors.append(len(list(nx.common_neighbors(net, couple[0][0], couple[0][1])))) jaccard.append(list(nx.jaccard_coefficient(net, [(couple[0][0], couple[0][1])]))[0][2]) adamic.append(list(nx.adamic_adar_index(net, [(couple[0][0], couple[0][1])]))[0][2]) try: edge_bet.append(edge_betweeness[couple[0]]) except KeyError: edge_bet.append(edge_betweeness[(couple[0][1], couple[0][0])]) r_dist = 10.0/max(distances) r_n = 10.0/max(common_neighbors) r_j = 10.0/max(jaccard) r_a = 10.0/max(adamic) r_e = 10.0/max(edge_bet) distances = [j * r_dist for j in distances] common_neighbors = [j * r_n for j in common_neighbors] jaccard = [j * r_j for j in jaccard] adamic = [j * r_a for j in adamic] edge_bet = [j * r_e for j in edge_bet] plt.loglog(common_neighbors, color='b', label='common_neighbors') plt.loglog(distances, color='r', label='distances') plt.savefig('node_similarity/stats_cm.png', format='png') plt.close() plt.loglog(jaccard, color='b', label='jaccard') plt.loglog(distances, color='r', label='distances') plt.savefig('node_similarity/stats_j.png', format='png') plt.close() plt.loglog(adamic, color='b', label='adamic') plt.loglog(distances, color='r', label='distances') plt.savefig('node_similarity/stats_aa.png', format='png') plt.close() plt.loglog(edge_bet, color='b', label='edge betwenness') plt.loglog(distances, color='r', label='distances') plt.savefig('node_similarity/stats_eb.png', format='png') plt.close()
def get_TimeofLinks(self, graph, node1, node2): result = [] for node in networkx.common_neighbors(graph, node1, node2): if node in self.times: if self.debugar: print "already found the time for paper ", node else: if self.debugar: print "rescuing time from paper: ", str(node) paper = list(d for n,d in graph.nodes(data=True) if d['node_type'] == 'E' and n == node ) if self.debugar: print paper[0]['time'] self.times[node] = paper[0]['time'] result.append(self.times[node]) result.sort(reverse=True) return result
def calculateStability(self): balanceTriangle = 0 totalTriangles = 0 for edge, sign in self.edgeSignDict.iteritems(): node1 = int(float(edge.split(",")[0])) node2 = int(float(edge.split(",")[1])) commonNeigh = sorted(nx.common_neighbors(self.graph, node1, node2)) for inode in commonNeigh: sign1n = self.graph.get_edge_data(node1, inode, default={"weight": 10})["weight"] sign2n = self.graph.get_edge_data(node2, inode, default={"weight": 10})["weight"] sign12 = self.graph.get_edge_data(node1, node2, default={"weight": 10})["weight"] mul = sign1n * sign2n * sign12 if mul > 0 and mul < 10: balanceTriangle += 1 # if (sign1n*sign2n*sign12) != 0: totalTriangles += 1 print "Balance percentage: " + str((1.0 * balanceTriangle) / totalTriangles)
def get_ObjectsofLinks(self, graph, node1, node2): result = [] for node in networkx.common_neighbors(graph, node1, node2): if node in self.parameter.linkObjects: if self.debugar: print "already found the time for paper ", node else: if self.debugar: print "rescuing time from paper: ", str(node) MaxAmplitude = self.parameter.t0_ - 3 if self.debugar: print 'amplitude maxima:' , MaxAmplitude paper = list(d for n,d in graph.nodes(data=True) if d['node_type'] == 'E' and n == node ) if self.debugar: print 'Informacoes sobre o paper:' ,paper if paper[0]['time'] >= MaxAmplitude: self.parameter.linkObjects[node] = [paper[0]['time'], eval(paper[0]['keywords'])] if self.debugar: print 'Informacoes sobre o paper ja na memoria:' , self.parameter.linkObjects[node] result.append(self.parameter.linkObjects[node]) return result
def test_clustering_score(self): """ Test global clustering score with generalized formula This is the average of the local clustering scores for each node v: 2 Nv where Kv = degree C(v) = ---------- Nv = number of edges between Kv (Kv - 1) the neighbors of v """ test_data_path = os.path.join(self._fixtures_dir, "les-miserables.csv") results = ctd.get_summary(test_data_path) graph = ctd.get_graph(test_data_path) local_scores = [] for v in graph.nodes(): k = graph.degree(v) neighbor_links = [] for u in nx.all_neighbors(graph, v): neighbor_links += [tuple(sorted((u, w))) for w in nx.common_neighbors(graph, u, v)] n = len(list(set(neighbor_links))) local_scores.append(2 * n / float(k * (k - 1))) if k > 1 else local_scores.append(0) self.assertAlmostEqual(results["clustering"], sum(local_scores) / float(len(local_scores)))
def get_pair_nodes_not_linked(self, graph, file, min_papers): print "Starting getting pair of nodes that is not liked", datetime.today() results = [] nodesinGraph =set(n for n,d in graph.nodes(data=True) if d['node_type'] == 'N') currentNodes = set() for n in nodesinGraph: papers = set(networkx.all_neighbors(graph, n)) print papers if (len(papers) >= min_papers): currentNodes.add(n) print 'qty of authors: ', len(currentNodes) nodesOrdered = sorted(currentNodes) element = 0 totalnodesOrdered = len(nodesOrdered) for node1 in nodesOrdered: element = element+1 FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ") others = set(n for n in nodesOrdered if n > node1) notLinked = set() for other_node in others: if len(set(networkx.common_neighbors(graph, node1, other_node))) == 0: notLinked.add(other_node) results.append([node1, notLinked]) if element % 2000 == 0: for item in results: file.write(str(item[0]) + '\t' + repr(item[1]) + '\n') results = [] for item in results: file.write(str(item[0]) + '\t' + repr(item[1]) + '\n') results = [] print "getting pair of nodes that is not liked finished", datetime.today()
def predict(u, v): cnbors = list(nx.common_neighbors(G, u, v)) return len(cnbors)
def predict(u, v): return sum(1 / math.log(G.degree(w)) for w in nx.common_neighbors(G, u, v))
G.add_edges_from(edges) nx.write_gml(G, "actor_to_movie_all_movies.gml") nodes_to_remove = [] for node in G.nodes(): if G.degree(node) == 1: nodes_to_remove.append(node) G.remove_nodes_from(nodes_to_remove) nx.write_gml(G, "actor_to_movie_common_movies.gml") for node in G.nodes(): for node2 in G.nodes(): if len(list(nx.common_neighbors(G, node, node2))) > 0: if G.edge[node, node2] is not None: G.add_edge(node, node2) G.edge[node, node2]['weight'] = 1 else: G.edge[node, node2]['weight'] += 1 movies = [] for node in G.nodes(): if node not in top_100_actors: movies.append(node) G.remove_nodes_from(movies) nx.draw(G, with_labels=True) plt.show()
def get_dyads(): offset = int(request.args.get('offset')) network_type = str(request.args.get('network_type')) cur = g.db.cursor(cursor_factory=psycopg2.extras.DictCursor) start = datetime.datetime(2015, 6, 27, 22, 0, 0, 0) if offset != 0: start = start + datetime.timedelta(minutes=10*offset) end = start + datetime.timedelta(minutes=10) cur.execute(""" select pd.user_a, pd.user_b, lat, lon, c_time, dff.distinct_co_occurneces as distinct_grids, dff.same_concerts_jac, dff.same_camp_score from presentation_prediction_dyads pd inner join derived_friend_features dff on dff.user_a = pd.user_a and dff.user_b = pd.user_b where c_time between %s and %s """, (start, end, )) G = pickle.load(open("friends_graph.pkl", "rb")).to_undirected() nodes = [] edges = [] points = [] degrees = G.degree() nodes_added = set() for dyads in cur.fetchall(): points.append({ 'user_a': dyads['user_a'], 'user_b': dyads['user_b'], 'lat': float(dyads['lat']), 'lon': float(dyads['lon']) }) current_nodes = [dyads['user_a'], dyads['user_b']] # Add all neighbors if network_type == 'common': for neighbor in nx.common_neighbors(G, dyads['user_a'], dyads['user_b']): if neighbor not in nodes_added: nodes.append((neighbor, 'blue')) nodes_added.add(neighbor) for node in current_nodes: edges.append((node, neighbor, 1)) elif network_type=='no-neighbors': pass else: for node in current_nodes: for neighbor in G.neighbors(node): if neighbor not in nodes_added and neighbor not in current_nodes: nodes.append((neighbor, 'blue')) nodes_added.add(neighbor) edges.append((node, neighbor, 1)) for node in current_nodes: if node not in nodes_added: nodes.append((node, 'red')) nodes_added.add(node) edges.append((dyads['user_a'], dyads['user_b'], dyads['distinct_grids'])) new_nodes = [{'id': x[0], 'label':x[0], 'value': degrees[x[0]], 'color': x[1] } for x in list(set(nodes))] new_edges = [] ids = [] for edge in edges: _id = str(hash(':'.join([str(edge[0]), str(edge[1])]))) if not _id in ids: ids.append(_id) new_edges.append({'from': edge[0], 'to': edge[1], 'value': edge[2], 'id': _id}) print(str(sorted(Counter(ids).items()))) response = { 'points': points, 'network': { 'nodes': new_nodes, 'edges': new_edges }, 'start': start.strftime("%Y-%m-%d %H:%M:%S"), 'end': end.strftime("%Y-%m-%d %H:%M:%S") } return jsonify(response)
def predict(u, v): union_size = len(set(G[u]) | set(G[v])) if union_size == 0: return 0 return len(list(nx.common_neighbors(G, u, v))) / union_size