Exemple #1
0
 def get_AA_index(self, u, v, graph):
     # return np.array([2 * len(CN_dict[edge]) / (D_dict[edge[0]] + D_dict[edge[1]]) for edge in edges])
     if (u, v) in nx.edges(graph):
         graph.remove_edge(u, v)
         value = sum(
             1 / math.log(nx.degree(graph, n))
             for n in list(nx.common_neighbors(graph, u, v))) if len(
                 list(nx.common_neighbors(graph, u, v))) != 0 else 0
         graph.add_edge(u, v)
     else:
         value = sum(
             1 / math.log(nx.degree(graph, n))
             for n in list(nx.common_neighbors(graph, u, v))) if len(
                 list(nx.common_neighbors(graph, u, v))) != 0 else 0
     return value
Exemple #2
0
 def predict(u, v):
     Cu = _community(G, u, community)
     Cv = _community(G, v, community)
     if Cu != Cv:
         return 0
     cnbors = nx.common_neighbors(G, u, v)
     return sum(1 / G.degree(w) for w in cnbors if _community(G, w, community) == Cu)
    def predict(self, node_pairs):
        predictions = []
        for node_pair in node_pairs:
            uNeighborhood = self.graph.neighbors(node_pair[0])
            vNeighborhood = self.graph.neighbors(node_pair[1])
            uNeighborhood = list(uNeighborhood)
            vNeighborhood = list(vNeighborhood)
            intersectionNeighbors = list(
                common_neighbors(self.graph, node_pair[0], node_pair[1]))
            unionNeighbors = set().union(uNeighborhood, vNeighborhood)

            a = len(intersectionNeighbors)
            b = len(unionNeighbors)
            c = len(unionNeighbors)
            d = len(self.graph) - len(unionNeighbors)

            denominator = ((a + b) * (b + d)) + ((a + c) * (c + d))

            predictions.append(
                (node_pair[0], node_pair[1],
                 2 * (a * d - b * c) / denominator if denominator != 0 else 0))
        return predictions

        def __repr__(self):
            return self.__str__()

        def __str__(self):
            return 'AdjustedRand'
Exemple #4
0
def SimilarityMeasures(G):

    # resource_allocation_index
    preds = nx.resource_allocation_index(G, [(1, 2), (3, 4), (1, 4), (5, 6),
                                             (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')

    # Common neighours
    print(sorted(nx.common_neighbors(G, 1, 2)))
    print('****************************')

    # jaccard coefficient
    preds = nx.jaccard_coefficient(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')

    # AdamicAdar
    preds = nx.adamic_adar_index(G, [(1, 2), (3, 4), (1, 4), (5, 6), (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')

    # Preferential Attachment (PA),
    preds = nx.preferential_attachment(G, [(1, 2), (3, 4), (1, 4), (5, 6),
                                           (3, 5)])
    for u, v, p in preds:
        print('(%d, %d) -> %.8f' % (u, v, p))

    print('****************************')
Exemple #5
0
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     cosine_val = math.sqrt(G.degree(u) * G.degree(v))
     if cosine_val == 0:
         return 0
     else:
         return len(cnbors) / cosine_val
Exemple #6
0
    def test_clustering_score(self):
        """
        Test global clustering score with generalized formula

        This is the average of the local clustering scores for each node v:

                  2 Nv        where Kv = degree
        C(v) = ----------           Nv = number of edges between
               Kv (Kv - 1)               the neighbors of v
        """
        test_data_path = os.path.join(self._fixtures_dir, 'les-miserables.csv')
        results = ctd.get_summary(test_data_path)
        graph = ctd.get_graph(test_data_path)

        local_scores = []
        for v in graph.nodes():
            k = graph.degree(v)
            neighbor_links = []
            for u in nx.all_neighbors(graph, v):
                neighbor_links += [
                    tuple(sorted((u, w)))
                    for w in nx.common_neighbors(graph, u, v)
                ]
            n = len(list(set(neighbor_links)))
            local_scores.append(
                2 * n / float(k *
                              (k - 1))) if k > 1 else local_scores.append(0)

        self.assertAlmostEqual(results['clustering'],
                               sum(local_scores) / float(len(local_scores)))
def new_connections_predictions():
    df = future_connections
    df['jaccard_coefficient'] = [
        x[2] for x in nx.jaccard_coefficient(G, df.index)
    ]
    df['resource_allocation_index'] = [
        x[2] for x in nx.resource_allocation_index(G, df.index)
    ]
    df['preferential_attachment'] = [
        x[2] for x in nx.preferential_attachment(G, df.index)
    ]
    df['common_neighbors'] = df.index.map(
        lambda ind: len(list(nx.common_neighbors(G, ind[0], ind[1]))))
    print('.......we have extracted all the features......')
    df_train = df[~pd.isnull(df['Future Connection'])]
    df_test = df[pd.isnull(df['Future Connection'])]
    features = [
        'jaccard_coefficient', 'resource_allocation_index',
        'preferential_attachment', 'common_neighbors'
    ]
    X_train = df_train[features]
    Y_train = df_train['Future Connection']
    X_test = df_test[features]
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    clf = LogisticRegression(solver='liblinear', random_state=14)
    clf.fit(X_train_scaled, Y_train)
    predictions = np.round(clf.predict_proba(X_test_scaled)[:, 1], 2)
    results = pd.Series(data=predictions, index=X_test.index)
    results = results.sort_values(ascending=False)
    return results


# print (new_connections_predictions())
Exemple #8
0
def linkProb(g, nodei: str, nodej: str):
    ni_nj = list(nx.common_neighbors(g, str(nodei), str(nodej)))
    prob = [len(list(g.neighbors(i))) for i in ni_nj]
    total = 0
    for i in prob:
        total += 1 / math.log(i)
    return total
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     sumDg = max(G.degree(u), G.degree(v))
     if sumDg == 0:
         return 0
     else:
         return len(cnbors) / sumDg
Exemple #10
0
    def create_features(self, G_train, edge_bunch):
        i = 0
        X = []
        page_rank = nx.pagerank_scipy(G_train)
        for pair in edge_bunch:
            commmon_neighbors = len(
                list(nx.common_neighbors(G_train, pair[0], pair[1])))
            jaccard_coefficient = nx.jaccard_coefficient(G_train,
                                                         [pair]).next()[2]
            adamic_adar = nx.adamic_adar_index(G_train, [pair]).next()[2]
            degree_0 = nx.degree(G_train, pair[0])
            degree_1 = nx.degree(G_train, pair[1])
            prod = degree_0 * degree_1
            page_rank_0 = page_rank[pair[0]]
            page_rank_1 = page_rank[pair[1]]

            f = [
                degree_0,
                degree_1,
                prod,
                commmon_neighbors,
                jaccard_coefficient,
                adamic_adar,
                page_rank_0,
                page_rank_1,
            ]

            X.append(f)

            i += 1
            if i % 1000000 == 0:
                print(i)

        return np.array(X)
 def cal_net_features(self):
     common_neighbors = [len(list(nx.common_neighbors(self.graph, u, v))) for u, v in self.graph.edges]
     com_mean = np.mean(np.array(common_neighbors))
     com_var = np.var(np.array(common_neighbors))
     degree_sequence = sorted([d for n, d in self.graph.degree()], reverse=True)
     core_count = len([i for i in degree_sequence if i > np.quantile(degree_sequence, 0.75)])
     return com_mean, com_var, core_count
Exemple #12
0
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     union_size = len(set(G[u]) | set(G[v]))
     if union_size == 0:
         return 0
     else:
         return len(cnbors) / union_size
Exemple #13
0
    def update_dicts_of_common_neighbors_info(self, node):
        # gathers info about the number/max number of common neighbors of the given node and its neighbors
        # (in another word): counts the number of triangles a node participates in and max number of triangles that
        # this node simultaneously participates in with a neighbor node.

        # initializing for a node that has not been visited yet.
        if (node in self.dict_common_neighbors) is False:
            self.dict_common_neighbors[node] = {}
            self.max_common_neighbors[node] = -1

        for neighbor in self.graph.neighbors(node):
            if neighbor in self.dict_common_neighbors[node]:
                continue
            if (neighbor in self.dict_common_neighbors) is False:
                self.dict_common_neighbors[neighbor] = {}
                self.max_common_neighbors[neighbor] = -1

            number_common_neighbors = sum(
                1 for _ in nx.common_neighbors(self.graph, node, neighbor))
            self.dict_common_neighbors[node][
                neighbor] = number_common_neighbors
            self.dict_common_neighbors[neighbor][
                node] = number_common_neighbors

            if number_common_neighbors > self.max_common_neighbors[node]:
                self.max_common_neighbors[node] = number_common_neighbors
            if number_common_neighbors > self.max_common_neighbors[neighbor]:
                self.max_common_neighbors[neighbor] = number_common_neighbors
Exemple #14
0
def AdamicAdarIndex(g, edge):  
    
    if g is None or edge is None:
        return
  
    g_undirect = nx.to_undirected(g)
    
    source = edge[0]
    dest = edge[1]
    common = nx.common_neighbors(g_undirect, source, dest)
    
    index = 0.0
    number_of_neighbors = 0
    
    #Adamic-Adar:
    for neigh in common:
        index += 1/math.log(g_undirect.degree(neigh), 10)
        number_of_neighbors += 1
    
    #Maximum Adamic-Adar:
    max_adamic_adar = (1/math.log(2,10))*number_of_neighbors
    
    normalized_adamic_adar = (float(index)/float(max_adamic_adar)) if max_adamic_adar != 0 else 0
    
    return normalized_adamic_adar
def get_edge_embeddedness(graph, pairs):
    c = Column(1, 'numerical')
    value = dict()
    for pair in pairs:
        value[pair] = len(list(nx.common_neighbors(graph, pair[0], pair[1])))
    c.value = value
    return c
Exemple #16
0
 def predict(u, v):
     result = 0
     for node in nx.common_neighbors(G, u, v):
         #result += 1. * distance(dic[node],dic[u],dic[v])
         #result += (1. * distance(dic[node],dic[u],dic[v])) / (np.log10(len(G[node])) *  averageDis)
         result += (1. ) / (float(len(G[node])))
     return result
def print_sim_nodes(g, k=10):
    CN = []  # common neighbors
    JC = []  # jaccard coefficient
    AA = []  # adamic_adar_index
    PA = []  # preferential attachment

    # nodeと次のノード取得
    nodes = list(g.nodes())
    l = g.number_of_nodes()
    for i, x in enumerate(nodes):
        if i < (l - 1):
            y = nodes[i + 1]

        CN.append(tuple([x, y, len(list(nx.common_neighbors(g, x, y)))]))
        JC.append(list(nx.jaccard_coefficient(g, [(x, y)]))[0])
        AA.append(list(nx.adamic_adar_index(g, [(x, y)]))[0])
        PA.append(list(nx.preferential_attachment(g, [(x, y)]))[0])

    # top k
    print("vertex pair:", x, "and", y)
    print("common neighbors")
    print(sorted(CN, key=lambda x: x[2], reverse=True)[:k])
    print("Jaccard coefficient")
    print(sorted(JC, key=lambda x: x[2], reverse=True)[:k])
    print("Adamic/Adar")
    print(sorted(AA, key=lambda x: x[2], reverse=True)[:k])
    print("preferential attachment")
    print(sorted(PA, key=lambda x: x[2], reverse=True)[:k])
def common_neighbor_scores(g_train,train_test_split):
    if g_train.is_directed():  # Only works for undirected graphs
        g_train = g_train.to_undirected()

    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split

    start_time = time.time()
    cn_scores = {}

    # Calculate scores
    cn_matrix = np.zeros(adj_train.shape)
    for u, v in get_ebunch(train_test_split):  # (u, v) = node indices, p = Adamic-Adar index
        cn = len(list(nx.common_neighbors(g_train, u, v)))
        cn_matrix[u][v] = cn
        cn_matrix[v][u] = cn  # make sure it's symmetric
    cn_matrix = cn_matrix / cn_matrix.max()  # Normalize matrix

    runtime = time.time() - start_time
    # cn_roc, cn_ap = get_roc_score(test_edges, test_edges_false, cn_matrix)
    val_roc, val_avg, test_roc, test_avg = train_lr(train_test_split, cn_matrix)

    cn_scores['test_roc'] = test_roc
    cn_scores['test_ap'] = test_avg
    cn_scores['runtime'] = runtime
    return cn_scores
def computeNeighborOverlap(g):
    # creating a dictionary of edges and thier neighborhood overlap value
    edgeOverlaps = {}
    # to iterate over the edges in g
    edgeIter = iter(g.edges)  # next() returns a touple with to and from
    for i in range(0, len(g.edges)):
        thisEdge = next(edgeIter)
        overVal = -1  # current overlap value
        to = thisEdge[0]  # first element of the edge touple, to
        fromm = thisEdge[1]  # second element of the edge touple, from

        # calculate the edgeOverlap between the two and from of thisEdge
        # cuv / (ku + kv - 2 - cuv)
        cuv = sum(
            1 for e in nx.common_neighbors(g, to, fromm)
        )  # shared neighbors of the endpoints
        ku = sum(1 for e in nx.all_neighbors(g, to))
        kv = sum(1 for e in nx.all_neighbors(g, fromm))
        overVal = 0
        try:
            overVal = cuv / (ku + kv - 2 - cuv)
        except:
            pass
        edgeOverlaps.update({thisEdge: overVal})
    return edgeOverlaps
def collectAdarScores(G, train_edges_name):
    
    # find which edges are unconnected in the training
    df_train = pd.read_csv(train_edges_name)
    df_train = df_train.replace(np.nan, 'nan', regex=True)
    #print(G.neighbors('nan'))
    #print(err)
    #list_unconnected = df_train.index[df_train['training_labels'] == 0].tolist() #df_train.where(df_train['training_labels']==0))

    list_real_labels = []
    list_pred_scores = []
    for i_row in range(len(df_train.node1)): # for each training set data
        node1 = df_train.node1[i_row]
        node2 = df_train.node2[i_row]

        # Find all nbrs of node1 and node2 in training graph that overlap
        list_nbrs = sorted(nx.common_neighbors(G, node1, node2))
            
        total_sum = 0
        # if list_nbrs isn't empty, find the weights of all the edges connected to the nbrs
        for i in range(len(list_nbrs)):
            curr_weight = G.degree(list_nbrs[i], weight='weight')
            total_sum += -1/np.log(curr_weight)

        #
        list_real_labels.append(df_train.labels[i_row])
        list_pred_scores.append(total_sum)

    return list_pred_scores, list_real_labels
Exemple #21
0
def construct_relation_graph(B,
                             set=0,
                             weight_fn=lambda i1, i2, w: len(w),
                             name=""):
    hash = hashlib.md5(nx.info(B).encode('utf-8')).hexdigest()
    filename = 'graph_' + hash + '_' + name + '.pkl'
    if os.path.isfile('cache/' + filename) and False:
        with open('cache/' + filename, 'rb') as f:
            dprint("Relation graph loaded...")
            return pickle.load(f)
    """ Construct relation graph"""
    dprint("Constructing relation graph...")
    # Get buyers and products
    sets = nx.bipartite.sets(B)

    # Get all combinations between (0 - buyers, 1 - products) set items
    combinations = itertools.combinations(sets[set], 2)

    G = nx.empty_graph(len(sets[set]))

    # Construct edges with weights
    edges = [(i1, i2, weight_fn(i1, i2, list(nx.common_neighbors(B, i1, i2))))
             for (i1, i2) in combinations]

    # Add edges to graph
    G.add_weighted_edges_from([edge for edge in edges if edge[2] is not None])

    dprint("Relation graph constructed")

    # Save to cache
    with open('cache/' + filename, 'wb') as f:
        pickle.dump(G, f)
    return G
def jaca_predict(graph, a, b):
    number_x = len(list(nx.neighbors(graph, a)))
    number_y = len(list(nx.neighbors(graph, b)))
    common = len(list(nx.common_neighbors(graph, a, b)))

    score = common / (number_x + number_y - common)
    return score
def L_P_WCN(network: nx.Graph, num_add):
    nodes_pair = []  # the pairs of nodes with edges and without edges
    probability_add = []  # the probabilities of the pairs of nodes to be added
    score = 0.0  # the score of each pair of nodes in link prediction model
    total_score = 0.0  # the sum of scores of pairs of nodes without edge and with edge

    # calculate the score of each pair of nodes
    for i, elei in enumerate(list(network.nodes()), 1):
        for j, elej in enumerate(list(network.nodes()), 1):
            # initialize score for each edge
            score = 0.0
            if i >= j:
                continue
            try:
                for z in nx.common_neighbors(network, elei, elej):
                    w_elei_z = network.get_edge_data(elei, z).get('weight')
                    w_z_elej = network.get_edge_data(z, elej).get('weight')
                    score += w_elei_z + w_z_elej
            except:
                continue
            total_score += score
            nodes_pair.append((elei, elej, score))

    for a, b, c in nodes_pair:
        probability_add.append(c / total_score)  # calculate the probabilities of edges to be added

    # select edges to be added according to probabilities
    edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add)
    '''
    for a, b, c in edges_add:
        network.add_edge(a, b)  # add selected edges
    '''
    return edges_add
 def predict(u, v):
     Cu = _community(G, u, community)
     Cv = _community(G, v, community)
     cnbors = list(nx.common_neighbors(G, u, v))
     neighbors = (sum(_community(G, w, community) == Cu
                      for w in cnbors) if Cu == Cv else 0)
     return len(cnbors) + neighbors
 def __init__(self, preparedParameters, filePathResults, filePathAnalyseResult, topRank):
     print "Starting Analysing the results", datetime.today()
     
     absFilePath = filePathResults
     absfilePathAnalyseResult = filePathAnalyseResult #FormatingDataSets.get_abs_file_path(filePathAnalyseResult)
     fResult = open(absFilePath, 'r')
     with open(absfilePathAnalyseResult, 'w') as fnodes:
         self.success = 0
         element = 0
         for line in fResult:
             element = element+1
             FormatingDataSets.printProgressofEvents(element, topRank, "Analysing the results: ")
             cols = line.strip().replace('\n','').split('\t')
             if len(list(networkx.common_neighbors(preparedParameters.testGraph, cols[len(cols)-2] ,  cols[len(cols)-1] ))) != 0:
                 self.success = self.success + 1
                 fnodes.write(cols[len(cols)-2]  + '\t' + cols[len(cols)-1] + '\t' +  'SUCCESS \r\n')
             else:
                 fnodes.write(cols[len(cols)-2]  + '\t' + cols[len(cols)-1] + '\t' +  'FAILED \r\n')
             
             
             
             if element == topRank:
                 break 
         
         result =  float(self.success) / float(topRank) *100
         strResult = 'Final Result: \t' + str(result) + '%'
         fnodes.write(strResult)
         fnodes.write('\n#\t'+str(self.success))
         fnodes.close()
     print "Analysing the results finished", datetime.today()
def common_neighbors(features, G):
    nb_common_neighbors = []
    for i in range(features.shape[0]):
        a = features['From'][i]
        b = features['To'][i]
        nb_common_neighbors.append(len(sorted(nx.common_neighbors(G, a, b)))) # ajoute le nombre de voisins communs
    return nb_common_neighbors
def neighborhoodOverlapDistribution(graph):

    Dist = {}

    interval = 0.1

    # percorre todos os nos do grafo
    for n in graph.nodes():
        all_neighbors = list(nx.all_neighbors(graph, n))
        # percorre os vizinhos de cada no para calculcar o overlap
        for neighbor in all_neighbors:
            common_neighbors = list(nx.common_neighbors(graph, n, neighbor))

            # checa divisao por zero
            calc = 0.0
            try:
                #calc = (len(list(common_neighbors))) / (len(list(all_neighbors)) +
                #(len(list(nx.all_neighbors(graph, neighbor)))) - len(list(common_neighbors)) - 2.0)
                calc = (len(list(common_neighbors))) / (
                    (len((set(all_neighbors)) |
                         (set(nx.all_neighbors(graph, neighbor))))) - 2.0)
            except Exception as e:
                calc = 0.0

            try:
                Dist[groupBy(calc,
                             interval)] = Dist[groupBy(calc, interval)] + 1
            except Exception as e:
                Dist[groupBy(calc, interval)] = 1

    return OrderedDict(sorted(Dist.items(), key=lambda t: t[0]))
def L_P_CN(network):
    num_add = 0  # the number of egdes to be added
    nodes_pair_without_edge = []  # the pairs of nodes without edges
    probability_add = []  # the probabilities of the pairs of nodes to be added
    score = 0  # the score of each pair of nodes in link prediction model
    total_score_without_edge = 0.0  # the sum of scores of pairs of nodes without edge

    #  calculate the score of each pair of nodes
    for i, elei in enumerate(list(network.nodes()), 1):
        for j, elej in enumerate(list(network.nodes()), 1):

            if i >= j:
                continue
            if not network.has_edge(elei, elej):
                try:
                    score = len(nx.common_neighbors(network, elei, elej))
                except:
                    continue
                total_score_without_edge += score
                nodes_pair_without_edge.append((elei, elej, score))

    for a, b, c in nodes_pair_without_edge:
        probability_add.append(
            c / total_score_without_edge
        )  # calculate the probabilities of edges to be added
    # select edges to be added according to probabilities
    edges_add = calculate_param.prob_select_distinct(nodes_pair_without_edge,
                                                     probability_add, num_add)
    for a, b, c in edges_add:
        network.add_edge(a, b)  # add selected edges

    return True
Exemple #29
0
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     max_val = max(G.degree(u), G.degree(v))
     if max_val == 0:
         return 0
     else:
         return len(cnbors) / max_val
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     borsDg = G.degree(u) * G.degree(v)
     if borsDg == 0:
         return 0
     else:
         return len(cnbors) / borsDg
Exemple #31
0
 def simpleProximity(self, s, t): #s and t are the mip node IDs, NOT user/obj ids
     proximity = 0.0
     sharedWeight = 0.0
     for node in nx.common_neighbors(self.mip, s, t):
         sharedWeight = sharedWeight + self.mip[s][node]['weight'] + self.mip[t][node]['weight'] #the weight of the path connecting s and t through the current node
     proximity = sharedWeight/(self.mip.degree(s, weight = 'weight')+self.mip.degree(t, weight = 'weight')+0.000000000001)
     return proximity  
Exemple #32
0
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     mult_val = G.degree(u) * G.degree(v)
     if mult_val == 0:
         return 0
     else:
         return len(cnbors)/ mult_val
Exemple #33
0
 def predict(u, v):
     cnbors_len = len(list(nx.common_neighbors(G, u, v)))
     denomi = G.degree(u) + G.degree(v)
     if denomi == 0:
         return 0
     else:
         return (2*cnbors_len) / denomi
Exemple #34
0
 def predict(u, v):
     Cu = _community(G, u, community)
     Cv = _community(G, v, community)
     cnbors = list(nx.common_neighbors(G, u, v))
     neighbors = (sum(_community(G, w, community) == Cu for w in cnbors)
                  if Cu == Cv else 0)
     return len(cnbors) + neighbors
 def get_TimeofLinks(self, graph, node1, node2):
     result = []
     for node in networkx.common_neighbors(graph, node1, node2):
         for n,d in graph.nodes(data=True):
             if n == node:
                 result.append(d['time'])
     result.sort(reverse=True)
     return result
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     sum_cn = 0
     for w in cnbors:
         if not G.degree(w) == 0:
             #print("debug")
             sum_cn += 1/math.fabs(G.degree(w))
     return sum_cn
Exemple #37
0
 def predict(u, v):
     Cu = _community(G, u, community)
     Cv = _community(G, v, community)
     if Cu != Cv:
         return 0
     cnbors = nx.common_neighbors(G, u, v)
     return sum(1 / G.degree(w) for w in cnbors
                if _community(G, w, community) == Cu)
    def adamicAdarProximity(self, s, t):
        proximity = 0.0
        for node in nx.common_neighbors(self.mip, s, t):
            weights = self.mip[s][node]['weight'] + self.mip[t][node]['weight'] #the weight of the path connecting s and t through the current node
            if weights!=0: #0 essentially means no connection
#                print 'weights = '+str(weights)
#                print 'degree = '+str(self.mip.degree(node, weight = 'weight'))
                proximity = proximity + (weights*(1/(math.log(self.mip.degree(node, weight = 'weight'))+0.00000000000000000000000001))) #gives more weight to "rare" shared neighbors, adding small number to avoid dividing by zero
#                print 'proximity = '+str(proximity)
        return proximity    
Exemple #39
0
 def predict(u, v):
     Cu = _community(G, u, community)
     Cv = _community(G, v, community)
     if Cu != Cv:
         return 0
     cnbors = set(nx.common_neighbors(G, u, v))
     within = set(w for w in cnbors
                  if _community(G, w, community) == Cu)
     inter = cnbors - within
     return len(within) / (len(inter) + delta)
 def get_BagofWords(self, graph, node1, node2):
     result = set()
     for node in networkx.common_neighbors(graph, node1, node2):
         for n,d in graph.nodes(data=True):
             if n == node:
                 for keyword in  ast.literal_eval(d['keywords']):
                     result.add(keyword)
                
     
     return result
def get_adamic_adar_score(graph, pairs):
    c = Column(1, 'numerical')
    value = dict()
    for pair in pairs:
        common_nei = nx.common_neighbors(graph, pair[0], pair[1])
        score = 0.0
        for n in common_nei:
            score += 1.0 / math.log(len(graph.neighbors(n)) + 1)
        value[pair] = score
    c.value = value
    return c
 def generateDataForCalculate(self):
     if self.trainnigGraph == None:
         self.generating_Training_Graph()
     
     _nodes = sorted(self.trainnigGraph.nodes())
     adb = Base(self.filePathTrainingGraph + ".calc.pdl")
     adb.create('pairNodes', 'common', 'time', 'domain' )
     
     for node in sorted(_nodes):
         othernodes = set(n for n in _nodes if n > node)
         for other in othernodes:
             common =  set(networkx.common_neighbors(self.trainnigGraph, node, other))
             arestas = self.trainnigGraph.edges([node, other], True)
def merge(G,edge):
    """Returns a new graph with edge merged, and the new node containing the
            information lost in the merge. The weights from common neighbors
            are added together, a la Stoer-Wagner algorithm."""            
    if G.node[edge[1]]['type'] == 'login' or G.node[edge[1]]['type'] == 'email':
        edge = edge[::-1] # If login/email is on the right, flip the order, so it's always on the left
    
    nx.set_node_attributes(G,'list',{edge[0]:G.node[edge[0]]['list']+[edge[1]]})

    J = nx.contracted_edge(G,(edge[0],edge[1]),self_loops = False)  #Contract edge without self-loop
    # Weight stuff
    N = nx.common_neighbors(G,edge[0],edge[1]) #find common nodes
    for i in N:
        J[i][edge[0]]['weight'] = G[edge[0]][i]['weight'] + G[edge[1]][i]['weight'] #modify the weight after contraction
    return J
def graph_stats(distance_couple, net):
    distances = []
    common_neighbors = []
    jaccard = []
    adamic = []
    edge_bet = []
    edge_betweeness = nx.edge_betweenness_centrality(net)
    for couple in distance_couple:
        distances.append(couple[1])
        common_neighbors.append(len(list(nx.common_neighbors(net, couple[0][0], couple[0][1]))))
        jaccard.append(list(nx.jaccard_coefficient(net, [(couple[0][0], couple[0][1])]))[0][2])
        adamic.append(list(nx.adamic_adar_index(net, [(couple[0][0], couple[0][1])]))[0][2])
        try:
            edge_bet.append(edge_betweeness[couple[0]])
        except KeyError:
            edge_bet.append(edge_betweeness[(couple[0][1], couple[0][0])])

    r_dist = 10.0/max(distances)
    r_n = 10.0/max(common_neighbors)
    r_j = 10.0/max(jaccard)
    r_a = 10.0/max(adamic)
    r_e = 10.0/max(edge_bet)

    distances = [j * r_dist for j in distances]
    common_neighbors = [j * r_n for j in common_neighbors]
    jaccard = [j * r_j for j in jaccard]
    adamic = [j * r_a for j in adamic]
    edge_bet = [j * r_e for j in edge_bet]

    plt.loglog(common_neighbors, color='b', label='common_neighbors')
    plt.loglog(distances, color='r', label='distances')
    plt.savefig('node_similarity/stats_cm.png', format='png')
    plt.close()

    plt.loglog(jaccard, color='b', label='jaccard')
    plt.loglog(distances, color='r', label='distances')
    plt.savefig('node_similarity/stats_j.png', format='png')
    plt.close()

    plt.loglog(adamic, color='b', label='adamic')
    plt.loglog(distances, color='r', label='distances')
    plt.savefig('node_similarity/stats_aa.png', format='png')
    plt.close()

    plt.loglog(edge_bet, color='b', label='edge betwenness')
    plt.loglog(distances, color='r', label='distances')
    plt.savefig('node_similarity/stats_eb.png', format='png')
    plt.close()
 def get_TimeofLinks(self, graph, node1, node2):
     result = []
     for node in networkx.common_neighbors(graph, node1, node2):
         if node in self.times:
             if self.debugar:
                 print "already found the time for paper ", node
         else:
             if self.debugar:
                 print "rescuing time from paper: ", str(node)
             
             paper = list(d for n,d in graph.nodes(data=True) if d['node_type'] == 'E' and n == node )
             if self.debugar:
                 print paper[0]['time']
             self.times[node] = paper[0]['time']
         result.append(self.times[node])
     result.sort(reverse=True)
     return result
Exemple #46
0
    def calculateStability(self):
        balanceTriangle = 0
        totalTriangles = 0
        for edge, sign in self.edgeSignDict.iteritems():
            node1 = int(float(edge.split(",")[0]))
            node2 = int(float(edge.split(",")[1]))
            commonNeigh = sorted(nx.common_neighbors(self.graph, node1, node2))

            for inode in commonNeigh:
                sign1n = self.graph.get_edge_data(node1, inode, default={"weight": 10})["weight"]
                sign2n = self.graph.get_edge_data(node2, inode, default={"weight": 10})["weight"]
                sign12 = self.graph.get_edge_data(node1, node2, default={"weight": 10})["weight"]
                mul = sign1n * sign2n * sign12

                if mul > 0 and mul < 10:
                    balanceTriangle += 1
                # if (sign1n*sign2n*sign12) != 0:
                totalTriangles += 1

        print "Balance percentage: " + str((1.0 * balanceTriangle) / totalTriangles)
 def get_ObjectsofLinks(self, graph, node1, node2):
     result = []
     for node in networkx.common_neighbors(graph, node1, node2):
         if node in self.parameter.linkObjects:
             if self.debugar:
                 print "already found the time for paper ", node
         else:
             if self.debugar:
                 print "rescuing time from paper: ", str(node)
             
             MaxAmplitude = self.parameter.t0_ - 3
             if self.debugar:
                 print 'amplitude maxima:' , MaxAmplitude
             paper = list(d for n,d in graph.nodes(data=True) if d['node_type'] == 'E' and n == node )
             if self.debugar:
                 print 'Informacoes sobre o paper:' ,paper
             if paper[0]['time'] >= MaxAmplitude:
                 self.parameter.linkObjects[node] = [paper[0]['time'], eval(paper[0]['keywords'])]
         if self.debugar:
             print 'Informacoes sobre o paper ja na memoria:' , self.parameter.linkObjects[node]
         result.append(self.parameter.linkObjects[node])
     
     return result
Exemple #48
0
    def test_clustering_score(self):
        """
        Test global clustering score with generalized formula

        This is the average of the local clustering scores for each node v:

                  2 Nv        where Kv = degree
        C(v) = ----------           Nv = number of edges between
               Kv (Kv - 1)               the neighbors of v
        """
        test_data_path = os.path.join(self._fixtures_dir, "les-miserables.csv")
        results = ctd.get_summary(test_data_path)
        graph = ctd.get_graph(test_data_path)

        local_scores = []
        for v in graph.nodes():
            k = graph.degree(v)
            neighbor_links = []
            for u in nx.all_neighbors(graph, v):
                neighbor_links += [tuple(sorted((u, w))) for w in nx.common_neighbors(graph, u, v)]
            n = len(list(set(neighbor_links)))
            local_scores.append(2 * n / float(k * (k - 1))) if k > 1 else local_scores.append(0)

        self.assertAlmostEqual(results["clustering"], sum(local_scores) / float(len(local_scores)))
 def get_pair_nodes_not_linked(self, graph, file, min_papers):
     print "Starting getting pair of nodes that is not liked", datetime.today()
     results = []
     nodesinGraph =set(n for n,d in graph.nodes(data=True) if d['node_type'] == 'N')
     currentNodes = set()
     for n in nodesinGraph:
         
         papers = set(networkx.all_neighbors(graph, n))
         print papers
         if (len(papers) >= min_papers):
             currentNodes.add(n)
     
     print 'qty of authors: ', len(currentNodes)
     nodesOrdered = sorted(currentNodes)
     element = 0
     totalnodesOrdered = len(nodesOrdered)
     for node1 in nodesOrdered:
         element = element+1
         FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ")
         
         others =  set(n for n in nodesOrdered if n > node1)
         notLinked = set()
         for other_node in others:
             if len(set(networkx.common_neighbors(graph, node1, other_node))) == 0:
                 notLinked.add(other_node)
         results.append([node1, notLinked])
         if element % 2000 == 0:
             for item in results:
                 file.write(str(item[0]) + '\t' +  repr(item[1]) + '\n')
             results = []
             
     for item in results:
         file.write(str(item[0]) + '\t' +  repr(item[1]) + '\n')
     results = []
         
     print "getting pair of nodes that is not liked finished", datetime.today()
 def predict(u, v):
     cnbors = list(nx.common_neighbors(G, u, v))
     return len(cnbors)
 def predict(u, v):
     return sum(1 / math.log(G.degree(w))
                for w in nx.common_neighbors(G, u, v))
Exemple #52
0
G.add_edges_from(edges)

nx.write_gml(G, "actor_to_movie_all_movies.gml")

nodes_to_remove = []
for node in G.nodes():
	if G.degree(node) == 1:
		nodes_to_remove.append(node)

G.remove_nodes_from(nodes_to_remove)

nx.write_gml(G, "actor_to_movie_common_movies.gml")

for node in G.nodes():
	for node2 in G.nodes():
		if len(list(nx.common_neighbors(G, node, node2))) > 0:
			if G.edge[node, node2] is not None:
				G.add_edge(node, node2)
				G.edge[node, node2]['weight'] = 1
			else:
				G.edge[node, node2]['weight'] += 1

movies = []
for node in G.nodes():
	if node not in top_100_actors:
		movies.append(node)

G.remove_nodes_from(movies)

nx.draw(G, with_labels=True)
plt.show()
Exemple #53
0
def get_dyads():
    offset = int(request.args.get('offset'))
    network_type = str(request.args.get('network_type'))


    cur = g.db.cursor(cursor_factory=psycopg2.extras.DictCursor)

    start = datetime.datetime(2015, 6, 27, 22, 0, 0, 0)
    if offset != 0:
        start = start + datetime.timedelta(minutes=10*offset)

    end = start + datetime.timedelta(minutes=10)

    cur.execute("""
    select pd.user_a, pd.user_b, lat, lon, c_time, dff.distinct_co_occurneces as distinct_grids, dff.same_concerts_jac, dff.same_camp_score from presentation_prediction_dyads pd
    inner join derived_friend_features dff on dff.user_a = pd.user_a and dff.user_b = pd.user_b
        where c_time between %s and %s
    """, (start, end, ))

    G = pickle.load(open("friends_graph.pkl", "rb")).to_undirected()

    nodes = []
    edges = []
    points = []

    degrees = G.degree()

    nodes_added = set()

    for dyads in cur.fetchall():
        points.append({
            'user_a': dyads['user_a'],
            'user_b': dyads['user_b'],
            'lat': float(dyads['lat']),
            'lon': float(dyads['lon'])
        })

        current_nodes = [dyads['user_a'], dyads['user_b']]
        # Add all neighbors

        if network_type == 'common':
            for neighbor in nx.common_neighbors(G, dyads['user_a'], dyads['user_b']):
                if neighbor not in nodes_added:
                    nodes.append((neighbor, 'blue'))
                    nodes_added.add(neighbor)

                for node in current_nodes:
                    edges.append((node, neighbor, 1))
        elif network_type=='no-neighbors':
            pass

        else:
            for node in current_nodes:
                for neighbor in G.neighbors(node):
                    if neighbor not in nodes_added and neighbor not in current_nodes:
                        nodes.append((neighbor, 'blue'))
                        nodes_added.add(neighbor)

                    edges.append((node, neighbor, 1))


        for node in current_nodes:
            if node not in nodes_added:
                nodes.append((node, 'red'))
                nodes_added.add(node)



        edges.append((dyads['user_a'], dyads['user_b'], dyads['distinct_grids']))

    new_nodes = [{'id': x[0], 'label':x[0], 'value': degrees[x[0]], 'color': x[1] } for x in list(set(nodes))]
    new_edges = []
    ids = []
    for edge in edges:
        _id = str(hash(':'.join([str(edge[0]), str(edge[1])])))
        if not _id in ids:
            ids.append(_id)
            new_edges.append({'from': edge[0], 'to': edge[1], 'value': edge[2], 'id': _id})

    print(str(sorted(Counter(ids).items())))

    response = {
            'points': points,
            'network': {
                'nodes': new_nodes,
                'edges': new_edges
            },
            'start': start.strftime("%Y-%m-%d %H:%M:%S"),
            'end': end.strftime("%Y-%m-%d %H:%M:%S")
        }

    return jsonify(response)
Exemple #54
0
 def predict(u, v):
     union_size = len(set(G[u]) | set(G[v]))
     if union_size == 0:
         return 0
     return len(list(nx.common_neighbors(G, u, v))) / union_size