예제 #1
0
def data():

    #Networkx graph configuration
    G = nx.Graph()

    infile = open('redisdb.log')  # open the file for reading

    for line in infile:  # go through the input file, one line at a time
        line = line.strip(
        )  # remove the newline character at the endof each line
        root, follower = line.split(
            ',')  # split up line around comma characters
        G.add_edge(root, follower)

    # Initialize graph, add nodes and edges, calculate modularity and centrality.
    groups = community.best_partition(G)
    degree = cn.degree_centrality(G)

    # Add node attributes for name, modularity, and three types of centrality.
    nx.set_node_attributes(G, groups, 'group')
    nx.set_node_attributes(G, degree, 'degree')

    # create json dictionary format for networkx edges
    data1 = json_graph.node_link_data(G)

    #output json file
    with open('static/data.json', 'w') as output:
        json.dump(data1,
                  output,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ':'))

    return ''
def centrality_analysis(G, isDriected=False):
    '''
    :param g: Digraph()/ Graph()
    :return: several types of centrality of each nodes
    '''
    nodes = G.nodes()
    if isDriected:
        in_dc = centrality.in_degree_centrality(G)
        out_dc = centrality.out_degree_centrality(G)
        bc = centrality.betweenness_centrality(G)
        ec = centrality.eigenvector_centrality(G)

        cent = {}
        for node in nodes:
            cent[node] = [in_dc[node], out_dc[node], bc[node], ec[node]]
        print(
            "Four types of centrality are calculated \n" +
            "\n\tin_degree_centrality\n\tout_degree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality"
        )
        return cent
    else:
        dc = centrality.degree_centrality(G)
        bc = centrality.betweenness_centrality(G)
        ec = centrality.eigenvector_centrality(G)

        cent = {}
        for node in nodes:
            cent[node] = [dc[node], bc[node], ec[node]]
        print(
            "Three types of centrality are calculated \n" +
            "\n\tdegree_centrality\n\tbetweenness_centrality\n\teigenvector_centrality"
        )
        return cent
예제 #3
0
파일: graph.py 프로젝트: youngflyasd/GSSNN
def get_centrality(x, edge_index, batch):
    num_graphs = batch[-1] + 1
    N = x.shape[0]
    num_nodes = scatter_add(batch.new_ones(x.size(0)), batch, dim=0)
    cum_num_nodes = torch.cat(
        [num_nodes.new_zeros(1),
         num_nodes.cumsum(dim=0)[:-1]], dim=0)
    cum_num_nodes = torch.cat((cum_num_nodes, torch.tensor([N]).cuda()))
    row, col = edge_index
    c_centrality = []
    d_centrality = []
    for i in range(num_graphs):
        '''each graph'''
        s_id = cum_num_nodes[i]
        e_id = cum_num_nodes[i + 1]
        mask = torch.eq(row, s_id)
        for node in range(s_id + 1, e_id):
            mask = mask + torch.eq(row, node)
        g_row = torch.masked_select(row, mask) - s_id
        g_col = torch.masked_select(col, mask) - s_id

        G = to_networkx(torch.stack([g_row, g_col], dim=0))
        c_centrality = c_centrality + list(closeness_centrality(G).values())
        d_centrality = d_centrality + list(degree_centrality(G).values())

    c_centrality = torch.Tensor(c_centrality).cuda()
    d_centrality = torch.Tensor(d_centrality).cuda()
    return c_centrality, d_centrality
예제 #4
0
def get_centrality_labels(knn_graph_obj, perc_labeled, type='degree'):
        import random

        if type == 'degree':
                degree_centrality_knn = pd.DataFrame.from_dict(centrality.degree_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = degree_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(degree_centrality_knn.index))].tolist()
        elif type == 'closeness':
                closeness_centrality_knn = pd.DataFrame.from_dict(centrality.closeness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = closeness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(closeness_centrality_knn.index))].tolist()
        elif type == 'betweenness':
                betweenness_centrality_knn = pd.DataFrame.from_dict(centrality.betweenness_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = betweenness_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(betweenness_centrality_knn.index))].tolist()
        elif type == 'katz':
                katz_centrality_knn = pd.DataFrame.from_dict(centrality.katz_centrality(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = katz_centrality_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(katz_centrality_knn.index))].tolist()
        elif type == 'clustering':
                clustering_knn = pd.DataFrame.from_dict(clustering(knn_graph_obj), orient='index', columns=['value'])

                node_toget_labels = clustering_knn.sort_values(by = 'value', ascending = False).index[0:int(perc_labeled*len(clustering_knn.index))].tolist()
        else:
                indexes = list(knn_graph_obj.nodes)
                #print(indexes)
                node_toget_labels = random.sample(indexes, int(perc_labeled*len(indexes)))
                #print(node_toget_labels)

        return node_toget_labels
def get_topological_features(G, nodes=None):
    N_ = len(G.nodes)
    if nodes is None:
        nodes = G.nodes
    # Degree centrality
    d_c = get_features(degree_centrality(G).values())
    print 'a'
    # Betweeness centrality
    b_c = get_features(betweenness_centrality(G).values())
    print 'b'

    # Close ness centrality
    c_c = get_features(closeness_centrality(G).values())
    print 'c'
    # Clustering
    c = get_features(clustering(G).values())
    print 'd'

    d = diameter(G)
    r = radius(G)

    s_p_average = []
    for s in shortest_path_length(G):
        dic = s[1]
        lengths = dic.values()
        s_p_average += [sum(lengths) / float(N_)]

    s_p_average = get_features(s_p_average)

    features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]),
                              axis=0)

    return features
 def __init__(self,numberOfNode,totalPowerForEachNode):
     #self.graph = nx.powerlaw_cluster_graph(numberOfNode,(int)(numberOfNode/10),0.1)
     self.graph = nx.barabasi_albert_graph(numberOfNode,(int)(numberOfNode/20))
     self.currentPower=[0]*numberOfNode
     self.totalPower=[totalPowerForEachNode]*numberOfNode
     self.MeanPower=[0]*numberOfNode
     self.degreeCentralityCoef=list(Centrality.degree_centrality(self.graph).values())
def get_layer_info(subject, journal_volume, edge_list):

    G = nx.Graph()
    G.add_weighted_edges_from(edge_list)

    PATH = "C:/Users/hexie/Documents/APS_result/" + str(
        journal_volume) + "/" + str(subject)

    try:
        os.mkdir(PATH)
        os.chdir(PATH)
    except:
        os.chdir(PATH)

    degree_centrality = nxc.degree_centrality(G)
    try:
        eigen_vector_centrality = nxc.eigenvector_centrality(G)
        np.save("eigen_vector_centrality.npy", eigen_vector_centrality)
    except:
        print("fail to converge within 100 iterations of power")

    closeness_centrality = nxc.closeness_centrality(G)
    betweeness_centrality = nxc.betweenness_centrality(G)

    np.save("degree_centrality.npy", degree_centrality)
    np.save("closeness_centrality.npy", closeness_centrality)
    np.save("betweeness_centrality.npy", betweeness_centrality)

    with open(str(subject) + str(journal_volume) + ".txt", 'w') as f:
        f.write('Number of Edges: ' + str(nx.number_of_edges(G)) + "\n")
        f.write('Number of Nodes: ' + str(nx.number_of_nodes(G)) + "\n")

    nx.draw(G)
    plt.savefig(str(subject) + str(journal_volume) + ".png")
    plt.clf()
예제 #8
0
def get_centrality_labels(knn_graph_obj, type='degree'):
    import random

    if type == 'degree':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.degree_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'closeness':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.closeness_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'betweenness':
        node_toget_labels = pd.DataFrame.from_dict(
            centrality.betweenness_centrality(knn_graph_obj),
            orient='index',
            columns=['value'])
    elif type == 'clustering':
        node_toget_labels = pd.DataFrame.from_dict(clustering(knn_graph_obj),
                                                   orient='index',
                                                   columns=['value'])
    else:
        node_toget_labels = list(knn_graph_obj.nodes)
        #print(node_toget_labels)

    return node_toget_labels
 def findingCentrality(self,numberOfCenralityNode):
     degreeCentralityArray=list(Centrality.degree_centrality(self.graph).values())
     sortedDegreeCentralityArray=sorted(degreeCentralityArray)
     IndexOfSortedDegreeCentralityArray=sorted(range(len(degreeCentralityArray)), key=lambda x: degreeCentralityArray[x])
     output=[]
     for i in range(0,numberOfCenralityNode):
         output.append([IndexOfSortedDegreeCentralityArray[-i-1],sortedDegreeCentralityArray[-i-1]])
     return output
예제 #10
0
 def extract_degree_centrality(self):
     output = open(
         'output/' + self.set_ + '/' + self.set_ + '_degree_centrality.csv',
         'w')
     print('Calculating degree centrality')
     nodes = centrality.degree_centrality(self.G)
     for key in nodes:
         output.write(str(key) + ',' + str(nodes[key]) + '\n')
예제 #11
0
def write_highest_degree_cent(temp, file_degree_centr, stop):
    dc_high = sort_dictionary_by_value_desc(central.degree_centrality(temp))
    dc_high_count = Counter(dc_high)
    writer = csv.writer(file_degree_centr, delimiter=';')
    row = [stop.date()]
    for k, v in dc_high_count.most_common(5):
        row.append('%s: %f' % (k.replace(',', ''), v))
    writer.writerow(row)
    return
예제 #12
0
파일: graph.py 프로젝트: youngflyasd/GSSNN
def get_degree_centrality(dataset):
    centrality = []
    for data in dataset:
        '''each graph'''
        g_row, g_col = data.edge_index
        G = to_networkx(torch.stack([g_row, g_col], dim=0))
        c_centrality = list(degree_centrality(G).values())
        centrality = centrality + c_centrality

    return centrality
def compute_metrics(graph):

    G = json_graph.node_link_graph(graph, multigraph=False)
    degree_centrality = centrality.degree_centrality(G)
    closeness_centrality = centrality.closeness_centrality(G)
    betweenness_centrality = centrality.betweenness_centrality(G)
    page_rank = link_analysis.pagerank_alg.pagerank(G)
    max_clique = approximation.clique.max_clique(G)
    diameters = [distance_measures.diameter(g) for g in connected_component_subgraphs(G)]

    copy = dict()

    copy['id'] = graph['id']
    copy['name'] = graph['name']
    copy['graph'] = dict()
    copy['graph']['nodes'] = graph['nodes']
    copy['graph']['links'] = graph['links']
    copy['metrics'] = dict()

    # diameters
    copy['metrics']['diameter'] = dict()
    copy['metrics']['diameter']['all'] = diameters
    copy['metrics']['diameter']['max'] = max(diameters)
    copy['metrics']['diameter']['average'] = float(sum(diameters)) / float(len(diameters))

    # clique size
    copy['metrics']['maxClique'] = len(list(max_clique))

    # degree centrality
    copy['metrics']['degreeCentrality'] = dict()
    copy['metrics']['degreeCentrality']['byId'] = degree_centrality
    copy['metrics']['degreeCentrality']['max'] = sum(degree_centrality.values())
    copy['metrics']['degreeCentrality']['average'] = float(sum(degree_centrality.values())) / float(len(degree_centrality.values()))

    # closeness centrality
    copy['metrics']['closenessCentrality'] = dict()
    copy['metrics']['closenessCentrality']['byId'] = closeness_centrality
    copy['metrics']['closenessCentrality']['max'] = sum(closeness_centrality.values())
    copy['metrics']['closenessCentrality']['average'] = float(sum(closeness_centrality.values())) / float(len(closeness_centrality.values()))

    # degree centrality
    copy['metrics']['betweennessCentrality'] = dict()
    copy['metrics']['betweennessCentrality']['byId'] = betweenness_centrality
    copy['metrics']['betweennessCentrality']['max'] = sum(betweenness_centrality.values())
    copy['metrics']['betweennessCentrality']['average'] = float(sum(betweenness_centrality.values())) / float(len(betweenness_centrality.values()))

    # degree centrality
    copy['metrics']['pageRank'] = dict()
    copy['metrics']['pageRank']['byId'] = page_rank
    copy['metrics']['pageRank']['max'] = sum(page_rank.values())
    copy['metrics']['pageRank']['average'] = float(sum(page_rank.values())) / float(len(page_rank.values()))

    return copy
예제 #14
0
def embedding_method(digraph, p_id):
    # return graph features like degree short_path...
    # 连接数
    indegree = centrality.degree_centrality(digraph)
    # 中心性
    node_centrality = centrality.eigenvector_centrality_numpy(digraph)
    # 社区数
    # clique = nx.algorithms.clique.number_of_cliques(digraph.to_undirected())
    dict2matrix = lambda x: pd.DataFrame.from_dict(
        x, orient='index').values[p_id]
    concats = list(map(dict2matrix, [indegree, node_centrality]))
    design_matrix = np.concatenate(concats, axis=1)
    return design_matrix
예제 #15
0
파일: graph.py 프로젝트: ielm/hw3
def analyze_graph(graph: 'TopicGraph'):
    users = {}
    for node in graph.nodes():
        if isinstance(node, User):
            ins = [f"{e}" for e in graph.in_edges(node)]
            outs = [f"{e}" for e in graph.out_edges(node)]
            if node.id not in users.keys():
                user = {"NODE": node,
                        "INS": len(ins),
                        "OUTS": len(outs)}
                users[node.id] = user
            else:
                users[node.id]["INS"] += len(ins)
                users[node.id]["OUTS"] += len(outs)
    centrality = degree_centrality(graph)
    user_stats = []
    for u in [(k, v) for k, v in sorted(users.items(), key=lambda item: item[1]["INS"], reverse=True)]:
        user_posts = get_user_posts(graph, u[1]["NODE"])
        sentiment = average_user_sentiment(user_posts)

        u[1]["SENTIMENT"] = sentiment
        u[1]["NUM_POSTS"] = len(user_posts)
        u[1]["AVG_POST_LEN"] = sum([len(p.text) for p in user_posts]) / len(user_posts)
        u[1]["NUM_TOPICS"] = len(get_user_topics(graph, u[1]["NODE"]))

        for k in centrality.keys():
            if f"{k}" == f"{u[1]['NODE'].id}":
                u[1]["CENTRALITY_SCORE"] = centrality[k]
        user_stats.append(u)

    num_in_edges = float(sum(i[1]["INS"] for i in user_stats))

    """Please don't mind the mess that follows... pay attention to the other, prettier code... 😅"""

    for u in user_stats:
        u[1]["EDGE_SCORE"] = float(u[1]["INS"]) / num_in_edges
        u[1]["TOPIC_SCORE"] = float(u[1]["NUM_TOPICS"]) / len(get_all_posts(graph))
        u[1]["AVG_POST_LEN_SCORE"] = float(u[1]["AVG_POST_LEN"]) / sum([l[1]["AVG_POST_LEN"] for l in user_stats])
    for u in user_stats:
        _u = u[1]
        influence = avg([_u[score] for score in _u.keys() if "SCORE" in score])
        # weight = float(_u["NUM_POSTS"])/len(get_all_posts(graph))
        _u["RAW_INFLUENCE"] = influence
    inorm = sum(i[1]["RAW_INFLUENCE"] for i in user_stats)
    for u in user_stats:
        _u = u[1]
        _u["INFLUENCE"] = float(_u["RAW_INFLUENCE"]) / inorm
    for i, u in enumerate(sorted(user_stats, key=lambda item: item[1]["INFLUENCE"], reverse=True)):
        _u = u[1]
        _u["RANK"] = i
    return user_stats
예제 #16
0
def calc_node_based_centrality(edge_index, centrality='degree'):
    adj_list = edge_index.numpy().T
    G = nx.Graph()
    G.add_edges_from(adj_list)
    if centrality == 'degree':
        nodes_centrality = degree_centrality(G)
    elif centrality == 'eigenvector':
        nodes_centrality = eigenvector_centrality(G)
    elif centrality == "closeness":
        nodes_centrality = closeness_centrality(G)
    else:
        print(centrality, "is not defined")
        exit(1)

    edges_centrality = dict()
    for u, v in adj_list:
        edges_centrality[(u, v)] = nodes_centrality[u] * nodes_centrality[v]
    return edges_centrality
예제 #17
0
def top_nodes(G, k=3):
    """
        Returns the top k nodes for various
        centrality measures: degree, 
        betweennes and closeness.
        
        Args:
            G (nx.Graph): graph for which the 
                top nodes must be determined.
            
            k (int): number of top nodes to return.
                if set to -ve, all the nodes will be
                returned.
            
        Returns:
            res_dict (dict): dictionary of each centrality
                measure with list of top k nodes in that 
                measure as values to the dictionary.
    """
    # number of nodes in the graph each node is connected to
    node_deg_dict = centrality.degree_centrality(G)
    # number of all pair shortest paths that pass through each node
    node_btw_dict = centrality.betweenness_centrality(G)
    # number of neighbours connected to each other for each node
    node_clo_dict = centrality.closeness_centrality(G)

    # sort by nodes by each centrality measure in decreasing order
    top_k_deg_nodes = sorted(node_deg_dict.items(), key=lambda x: -x[1])
    top_k_btw_nodes = sorted(node_btw_dict.items(), key=lambda x: -x[1])
    top_k_clo_nodes = sorted(node_clo_dict.items(), key=lambda x: -x[1])

    # pick the top k nodes
    res_dict = dict()
    if k > 0:
        res_dict["degree"] = list(zip(*top_k_deg_nodes[:k]))[0]
        res_dict["betweenness"] = list(zip(*top_k_btw_nodes[:k]))[0]
        res_dict["closeness"] = list(zip(*top_k_clo_nodes[:k]))[0]

    else:
        res_dict["degree"] = list(zip(*top_k_deg_nodes))[0]
        res_dict["betweenness"] = list(zip(*top_k_btw_nodes))[0]
        res_dict["closeness"] = list(zip(*top_k_clo_nodes))[0]

    return res_dict
예제 #18
0
def parse(name):
    print(name)
    pathbase = path.abspath(path.dirname(__file__))
    G = nx.Graph()
    data = json.load(open('{0}/{1}.json'.format(pathbase, name)))
    nodes = data['nodes']
    text = {i: node['text'] for i, node in enumerate(nodes)}
    weight = {i: float(node['weight']) for i, node in enumerate(nodes)}
    for i in range(len(nodes)):
        G.add_node(i)
    for link in data['links']:
        G.add_edge(link['source'], link['target'])

    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweenness = centrality.betweenness_centrality(G)
    #edge_betweenness = centrality.edge_betweenness_centrality(G)
    #current_flow_closeness = centrality.current_flow_closeness_centrality(G)
    #current_flow_betweenness =\
    #    centrality.current_flow_betweenness_centrality(G)
    try:
        eigenvector = centrality.eigenvector_centrality(G, max_iter=1000)
    except:
        eigenvector = {i: 0 for i in range(len(nodes))}
    katz = centrality.katz_centrality(G)

    obj = {'nodes': [], 'links': data['links']}
    for i in range(len(nodes)):
        obj['nodes'].append({
            'text': text[i],
            'weight': weight[i],
            'degree': degree[i],
            'closeness': closeness[i],
            'betweenness': betweenness[i],
            #'edge_betweenness': edge_betweenness[i],
            #'current_flow_closeness': current_flow_closeness[i],
            #'current_flow_betweenness': current_flow_betweenness[i],
            'eigenvector': eigenvector[i],
            'katz': katz[i],
        })
    json.dump(obj,
              open('{0}/../data/{1}.json'.format(pathbase, name), 'w'),
              sort_keys=True)
예제 #19
0
    def get_central_nodes(nodes, parts, full_network, num_nodes, page_rank):
        central_flags = [0]*len(parts)

        for part_id in set(parts):
            part_nodes = []
            pr_centrality = {}
            for i, node in enumerate(nodes):
                if parts[i] == part_id:
                    part_nodes.append(node)
                    if len(page_rank) > 0:
                        pr_centrality[node] = page_rank[node]

            if len(page_rank) == 0:
                sub_graph = full_network.subgraph(part_nodes)
                centrality = degree_centrality(sub_graph)
                centrality_sorted = [x for x in sorted(centrality, key=centrality.get, reverse=True)]
            else:
                centrality_sorted = [x for x in sorted(pr_centrality, key=pr_centrality.get, reverse=True)]

            for central_node in centrality_sorted[0: num_nodes]:
                central_flags[nodes.index(central_node)] = 1

        return central_flags
예제 #20
0
def degree_centrality(graph):
    """degree_centrality"""
    return list(centrality.degree_centrality(graph).values())
예제 #21
0
def get_degree_centrality(G, **kwargs):
    """Returns a dictionary of degree centrality values for all nodes.
    """

    # Compute and return the degree cenntrality
    return nxc.degree_centrality(G)
예제 #22
0
#ノードの色の指定
d = list(G.nodes)
g_dict = {}
for i in range(len(c)):
    g_dict[i] = i

c_list = []
for j in range(len(d)):
    for n in range(len(c)):
        if d[j] in c[n]:
            c_list.append(g_dict[n])

#中心性解析
#次数中心性
cent_values = degree_centrality(G).values()
cent_central = degree_centrality(G)
cent_keys = degree_centrality(G).keys()

#近接中心性
d_values = closeness_centrality(G).values()
d_central = closeness_centrality(G)
d_keys = closeness_centrality(G).keys()

#媒介中心性
bet_values = betweenness_centrality(G).values()
bet_central = betweenness_centrality(G)
bet_keys = betweenness_centrality(G).keys()

#sorted(d_dict.values())
d_values_list = list(d_values)
예제 #23
0
    def compute_features(self):
        # Degree centrality
        degree_centrality = lambda graph: list(
            centrality.degree_centrality(graph).values())
        self.add_feature(
            "degree centrality",
            degree_centrality,
            "The degree centrality distribution",
            InterpretabilityScore(5),
            statistics="centrality",
        )

        # Betweenness Centrality
        betweenness_centrality = lambda graph: list(
            centrality.betweenness_centrality(graph).values())
        self.add_feature(
            "betweenness centrality",
            betweenness_centrality,
            "Betweenness centrality of a node v is the sum of the fraction of \
            all-pairs shortest paths that pass through v",
            InterpretabilityScore(5),
            statistics="centrality",
        )

        # Closeness centrality
        closeness_centrality = lambda graph: list(
            centrality.closeness_centrality(graph).values())
        self.add_feature(
            "closeness centrality",
            closeness_centrality,
            "Closeness is the reciprocal of the average shortest path distance",
            InterpretabilityScore(5),
            statistics="centrality",
        )

        # Edge betweenness centrality
        def edge_betweenness_centrality(graph):
            if graph.edges:
                return list(
                    centrality.edge_betweenness_centrality(graph).values())
            return [np.nan]

        self.add_feature(
            "edge betweenness centrality",
            edge_betweenness_centrality,
            "Betweenness centrality of an edge e is the sum of the fraction of \
            all-pairs shortest paths that pass through e",
            InterpretabilityScore(4),
            statistics="centrality",
        )

        # Harmonic centrality
        harmonic_centrality = lambda graph: list(
            centrality.harmonic_centrality(graph).values())
        self.add_feature(
            "harmonic centrality",
            harmonic_centrality,
            "Harmonic centrality of a node u is the sum of the reciprocal \
            of the shortest path distances from all other nodes to u",
            InterpretabilityScore(4),
            statistics="centrality",
        )

        # Subgraph centrality
        subgraph_centrality = lambda graph: list(
            centrality.subgraph_centrality(graph).values())
        self.add_feature(
            "subgraph centrality",
            subgraph_centrality,
            "The subgraph centrality for a node is the sum of weighted closed walks \
            of all lengths starting and ending at that node.",
            InterpretabilityScore(3),
            statistics="centrality",
        )

        # Second order centrality
        second_order_centrality = lambda graph: list(
            centrality.second_order_centrality(utils.ensure_connected(graph)).
            values())

        self.add_feature(
            "second order centrality",
            second_order_centrality,
            "The second order centrality of a given node is the standard deviation \
            of the return times to that node of a perpetual random walk on G",
            InterpretabilityScore(4),
            statistics="centrality",
        )

        # Eigenvector centrality
        eigenvector_centrality = lambda graph: list(
            centrality.eigenvector_centrality_numpy(
                utils.ensure_connected(graph)).values())
        self.add_feature(
            "eigenvector centrality",
            eigenvector_centrality,
            "Eigenvector centrality computes the centrality for a node based \
            on the centrality of its neighbors",
            InterpretabilityScore(4),
            statistics="centrality",
        )

        # Katz centrality
        katz_centrality = lambda graph: list(
            centrality.katz_centrality_numpy(utils.ensure_connected(graph)).
            values())
        self.add_feature(
            "katz centrality",
            katz_centrality,
            "Generalisation of eigenvector centrality - Katz centrality computes the \
            centrality for a node based on the centrality of its neighbors",
            InterpretabilityScore(4),
            statistics="centrality",
        )

        # Page Rank
        pagerank = lambda graph: list(nx.pagerank_numpy(graph).values())
        self.add_feature(
            "pagerank",
            pagerank,
            "The pagerank computes a ranking of the nodes in the graph based on \
            the structure of the incoming links. ",
            InterpretabilityScore(4),
            statistics="centrality",
        )
예제 #24
0
def update_properties():
    partition = community.best_partition(network)
    p_, nodes_community = zip(*sorted(partition.items()))
    nodes_source.data['community'] = nodes_community
    nodes_source.data['community_color'] = [community_colors[t % len(community_colors)]
                                            for t in nodes_community]
    centrality = centrality_metrics[select_centrality.value](network, weight='weight')
    _, nodes_centrality = zip(*sorted(centrality.items()))
    nodes_source.data['centrality'] = [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality]


update_props_button = Button(label="Update Properties")
update_props_button.on_click(update_properties)

centrality_metrics = {"Degree Centrality":
                          lambda n, weight=_: centrality_algorithms.degree_centrality(n),
                      "Closeness Centrality":
                          lambda n, weight=_: centrality_algorithms.closeness_centrality(n),
                      "Betweenness Centrality":
                          centrality_algorithms.betweenness_centrality}


def update_centrality(attrname, old, new):
    centrality = centrality_metrics[select_centrality.value](network, weight='weight')
    _, nodes_centrality = zip(*sorted(centrality.items()))
    nodes_source.data['centrality'] = [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality]


select_centrality = Select(title="Centrality Metric:", value="Degree Centrality",
                           options=list(centrality_metrics.keys()))
select_centrality.on_change('value', update_centrality)
 print(fname)
 try:
     G = read_dot(os.path.join("output", fname))
     nx.draw(G)
 except:
     print("cannot load graph")
     continue
 if G.number_of_nodes() == 0:
     print("Cannot read binary file")
     continue
 data = []
 data.append(fname)
 data.append(G.number_of_nodes())
 data.append(G.number_of_edges())
 data.append(density(G))
 deg_centrality = degree_centrality(G)
 data.extend(properties_of_array(deg_centrality))
 cln_centrality = closeness_centrality(G)
 data.extend(properties_of_array(cln_centrality))
 btn_centrality = betweenness_centrality(G)
 data.extend(properties_of_array(btn_centrality))
 st_path = shortest_path(G)
 deg = [len(val) for key, val in st_path.items()]
 d = np.array(deg)
 data.extend(
     [np.min(d),
      np.max(d),
      np.median(d),
      np.mean(d),
      np.std(d)])
 try:
예제 #26
0
corpo_pairs_list = open('./data/corpo_pairs_res.txt').readlines()

G = nx.Graph()
name_index_list = {}
index = 0
for _pair in corpo_pairs_list:
    _, pair_a, _, pair_b, _ = _pair.split(',')
    if pair_a not in name_index_list:
        name_index_list[pair_a] = str(index)
        index += 1
    if pair_b not in name_index_list:
        name_index_list[pair_b] = str(index)
        index += 1
    # print(pair_a + ',' + pair_b)
    G.add_edge(pair_a, pair_b)
# nx.draw(G)
# plt.savefig("path.png")
from networkx.algorithms.centrality import degree_centrality, closeness_centrality,betweenness_centrality,communicability_betweenness_centrality
degree_res = degree_centrality(G)
closeness_res = closeness_centrality(G)
betweenness_res = communicability_betweenness_centrality(G)


centrality_out = open('./data/centrality.txt', 'w')
centrality_out.write('pattern,degree,closeness,betweenness')
for key, value in degree_res.items():
    centrality_out.write('\n' + key + '\t %.2f \t %.2f \t %.2f'%(value,closeness_res[key],betweenness_res[key]))
centrality_out.close()
print(degree_res)
print(closeness_res)
print(betweenness_res)
예제 #27
0
class Create_network():

    centrality_metrics = {
        "Degree Centrality":
        lambda n, weight='_': centrality_algorithms.degree_centrality(n),
        "Closeness Centrality":
        lambda n, weight='_': centrality_algorithms.closeness_centrality(n),
        "Betweenness Centrality":
        centrality_algorithms.betweenness_centrality
    }

    community_colors = ['#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628', \
                            '#b3cde3','#ccebc5','#decbe4','#fed9a6','#ffffcc','#e5d8bd','#fddaec',\
                            '#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02','#a6761d','#666666']

    def __init__(self,
                 network_file,
                 layout_file,
                 count_path,
                 title,
                 width=800,
                 thresh_val=8):
        self.network_file = network_file
        self.layout_file = layout_file
        self.count_path = count_path
        self.network_tuple = self.load_network(network_file, layout_file)
        self.nodes_sources_tab1 = self.column_source(self.network_tuple[1],
                                                     count_path)
        self.network_plots_n_circle_tab1 = self.create_network_plot(
            self.nodes_sources_tab1, title, width)
        self.network_lines_tab1 = self.add_lines(
            self.network_tuple, self.network_plots_n_circle_tab1[0])
        self.get_centrality_n_community(self.network_tuple[0],
                                        self.nodes_sources_tab1,
                                        self.network_plots_n_circle_tab1[1])
        self.drop_button_tab1 = Button(label="Remove Node",
                                       button_type="warning")
        self.drop_button_tab1.on_click(self.remove_node_tab1)
        self.remove_unattached_button = Button(label="Remove unattached nodes",
                                               button_type="success")
        self.remove_unattached_button.on_click(self.remove_unbound_nodes)
        self.update_props_button = Button(label="Update Properties",
                                          button_type="warning")
        self.update_props_button.on_click(self.update_properties)
        self.update_layout_button = Button(label="Update Layout",
                                           button_type="success")
        self.update_layout_button.on_click(self.update_layout)
        self.select_centrality = Select(title="Centrality Metric:",
                                        value="Degree Centrality",
                                        options=list(
                                            self.centrality_metrics.keys()))
        self.select_centrality.on_change('value', self.update_centrality)
        self.slider = Slider(start=0,
                             end=10,
                             value=0,
                             step=1,
                             title="Threshold %")
        self.slider.on_change('value', self.filter_threshold)
        self.slider.value = thresh_val
        #self.filter_threshold('',0,3)

    def reinit(self, network_file, layout_file, count_path, title):
        lines_source = self.network_lines_tab1
        nodes_source = self.nodes_sources_tab1
        self.network_file = network_file
        self.layout_file = layout_file
        self.count_path = count_path
        self.network_plots_n_circle_tab1[0].title.text = title
        self.network_tuple = self.load_network(network_file, layout_file)
        network, layout = self.network_tuple
        print('loaded new network')
        nodes, nodes_coordinates = zip(*sorted(layout.items()))
        count_dict = dict(pickle.load(open(self.count_path, 'rb')))
        nodes_xs, nodes_ys = list(zip(*nodes_coordinates))
        node_occurances = [count_dict[node] for node in nodes]
        nodes_source.data['x'] = nodes_xs
        nodes_source.data['y'] = nodes_ys
        nodes_source.data['name'] = nodes
        nodes_source.data['counts'] = node_occurances
        lines_source.data = self.get_edges_specs(network, layout)
        self.update_properties()
        self.slider.value = 8
        self.filter_threshold('', 0, 8)

    def load_network(self, network_file, layout_file):
        network = pickle.load(open(network_file, 'rb'))
        layout = pickle.load(open(layout_file, 'rb'))
        return (network, layout)

    def column_source(self, layout, count_path):
        nodes, nodes_coordinates = zip(*sorted(layout.items()))
        count_dict = dict(pickle.load(open(count_path, 'rb')))
        nodes_xs, nodes_ys = list(zip(*nodes_coordinates))
        node_occurances = [count_dict[node] for node in nodes]
        nodes_source = ColumnDataSource(
            dict(x=nodes_xs, y=nodes_ys, name=nodes, counts=node_occurances))
        return nodes_source

    def create_network_plot(self, nodes_source, title='', width=800):
        plot = figure(plot_width=width,
                      plot_height=700,
                      tools=['tap', 'box_zoom', 'reset', 'pan', 'wheel_zoom'],
                      title=title)
        plot.title.text_font = "helvica"
        plot.title.text_font_style = "bold"
        plot.title.text_font_size = "20px"
        plot.background_fill_color = "beige"
        plot.background_fill_alpha = 0.2
        g1 = Circle(x='x', y='y', size=2, fill_color='blue')
        g1_r = plot.add_glyph(source_or_glyph=nodes_source, glyph=g1)
        g1_hover = HoverTool(renderers=[g1_r],
                             tooltips=[('name', '@name'),
                                       ('count', '@counts')])
        glyph_text = Text(x="x",
                          y="y",
                          text="name",
                          text_color="#ff4a4a",
                          text_font_size='6pt',
                          text_alpha=0.7)

        plot.add_glyph(nodes_source, glyph_text)
        plot.add_tools(g1_hover)
        plot.grid.grid_line_color = None
        plot.axis.visible = False
        return plot, g1_r, glyph_text

    def get_edges_specs(self, _network, _layout):
        d = dict(xs=[], ys=[], alphas=[])
        weights = [d['weight'] for u, v, d in _network.edges(data=True)]
        max_weight = max(weights)
        calc_alpha = lambda h: 0.1 + 0.6 * (h / max_weight)

        for u, v, data in _network.edges(data=True):
            d['xs'].append([_layout[u][0], _layout[v][0]])
            d['ys'].append([_layout[u][1], _layout[v][1]])
            d['alphas'].append(calc_alpha(data['weight']))
        return d

    def add_lines(self, network_tuple, plot):
        lines_source = ColumnDataSource(self.get_edges_specs(*network_tuple))
        r_lines = plot.multi_line('xs',
                                  'ys',
                                  line_width=2,
                                  alpha='alphas',
                                  color='navy',
                                  source=lines_source)
        return lines_source

    def get_centrality_n_community(self, network, nodes_source, g1_r):
        community_colors = self.community_colors
        centrality = networkx.algorithms.centrality.degree_centrality(network)
        # first element, are nodes again
        _, nodes_centrality = zip(*sorted(centrality.items()))
        nodes_source.add(
            [7 + 10 * t / max(nodes_centrality) for t in nodes_centrality],
            'centrality')

        partition = community.best_partition(network)
        p_, nodes_community = zip(*sorted(partition.items()))
        nodes_source.add(nodes_community, 'community')

        nodes_source.add([community_colors[t % len(community_colors)]\
                      for t in nodes_community], 'community_color')
        g1_r.glyph.size = 'centrality'
        g1_r.glyph.fill_color = 'community_color'

    def remove_node_1_net(self, nodes_source, lines_source, network, layout):
        print('line 92')
        print(type(nodes_source.selected['1d']['indices']))
        print(len(nodes_source.selected['1d']['indices']))
        if (nodes_source.selected['1d']['indices']):
            idx = nodes_source.selected['1d']['indices'][0]
        else:
            return
        # update networkX network object
        node = nodes_source.data['name'][idx]
        network.remove_node(node)
        print('line 97')
        # update layout
        layout.pop(node)

        # update nodes ColumnDataSource
        new_source_data = dict()
        for col in nodes_source.column_names:
            print('line 104')
            new_source_data[col] = [
                e for i, e in enumerate(nodes_source.data[col]) if i != idx
            ]
        nodes_source.data = new_source_data

        # update lines ColumnDataSource
        lines_source.data = self.get_edges_specs(network, layout)

    def remove_node_tab1(self):
        self.remove_node_1_net(self.nodes_sources_tab1,
                               self.network_lines_tab1, *self.network_tuple)

    def remove_unbound_nodes(self):
        network, layout = self.network_tuple
        lines_source = self.network_lines_tab1
        nodes_source = self.nodes_sources_tab1
        unbound_nodes = []
        for node in network.nodes():
            if not network.edges(node):
                unbound_nodes.append(node)
        for node in unbound_nodes:
            network.remove_node(node)
            layout.pop(node)

        nodes, nodes_coordinates = zip(*sorted(layout.items()))
        count_dict = dict(pickle.load(open(self.count_path, 'rb')))
        nodes_xs, nodes_ys = list(zip(*nodes_coordinates))
        node_occurances = [count_dict[node] for node in nodes]
        nodes_source.data['x'] = nodes_xs
        nodes_source.data['y'] = nodes_ys
        nodes_source.data['name'] = nodes
        nodes_source.data['counts'] = node_occurances
        self.update_properties()
        lines_source.data = self.get_edges_specs(network, layout)

    def update_properties(self):
        community_colors = self.community_colors
        network, layout = self.network_tuple
        nodes_source = self.nodes_sources_tab1
        partition = community.best_partition(network)
        p_, nodes_community = zip(*sorted(partition.items()))

        nodes_source.data['community'] = nodes_community
        nodes_source.data['community_color'] = [
            community_colors[t % len(community_colors)]
            for t in nodes_community
        ]
        centrality = self.centrality_metrics[self.select_centrality.value](
            network, weight='weight')
        _, nodes_centrality = zip(*sorted(centrality.items()))
        nodes_source.data['centrality'] = [
            7 + 10 * t / max(nodes_centrality) for t in nodes_centrality
        ]

    def update_centrality(self, attrname, old, new):
        network, _ = self.network_tuple
        nodes_source = self.nodes_sources_tab1
        centrality = self.centrality_metrics[self.select_centrality.value](
            network, weight='weight')
        _, nodes_centrality = zip(*sorted(centrality.items()))
        nodes_source.data['centrality'] = [
            7 + 10 * t / max(nodes_centrality) for t in nodes_centrality
        ]

    def update_layout(self):
        network, layout = self.network_tuple
        lines_source = self.network_lines_tab1
        nodes_source = self.nodes_sources_tab1
        new_layout = networkx.spring_layout(network,
                                            k=1.1 /
                                            sqrt(network.number_of_nodes()),
                                            iterations=100)
        layout = new_layout
        nodes, nodes_coordinates = zip(*sorted(layout.items()))
        nodes_xs, nodes_ys = list(zip(*nodes_coordinates))
        nodes_source.data['x'] = nodes_xs
        nodes_source.data['y'] = nodes_ys
        lines_source.data = self.get_edges_specs(network, layout)

    def filter_threshold(self, attrname, old, new):
        network, layout = self.network_tuple
        if (old == new):
            return
        if (old > new):
            self.network_tuple = self.load_network(self.network_file,
                                                   self.layout_file)
            network, layout = self.network_tuple
        weights = [d['weight'] for u, v, d in network.edges(data=True)]
        max_weight = max(weights)
        min_weight = min(weights)
        threshold = (new * (max_weight - min_weight) / 100.0)
        to_remove_list = []
        sources_in = set()
        for (u, v, d) in network.edges(data='weight'):
            if (d < threshold):
                if (((u, v, d) in sources_in) or ((v, u, d) in sources_in)):
                    continue
                to_remove_list.append((u, v))
                sources_in.add((u, v, d))
        network.remove_edges_from(to_remove_list)
        self.remove_unbound_nodes()
        font_size = 6 + new
        font_size = min(10, font_size)
        self.network_plots_n_circle_tab1[2].text_font_size = '{}pt'.format(
            font_size)
        self.update_layout()

    def return_view(self):
        return column(self.network_plots_n_circle_tab1[0],row(widgetbox(self.slider,self.select_centrality),\
                      widgetbox(self.drop_button_tab1,self.remove_unattached_button),\
                      widgetbox(self.update_props_button, self.update_layout_button,)))
예제 #28
0
def worker(nproc):
    def _print(*args, **kwargs):
        # Avoid printing the same stuff multiple times
        if nproc == 0:
            print(*args, **kwargs)

    def _regular_iterator(ls):
        for l in ls:
            yield l

    iterator = tqdm if nproc == 0 else _regular_iterator

    graph = nx.MultiDiGraph() if DIRECTIONAL_GRAPH else nx.MultiGraph()
    possible_targets = {}
    positive_train_triples = []

    train_lines = count_file_lines(PATH_TRAIN)
    test_lines = count_file_lines(PATH_TEST)

    # Start and end ranges for the triples that this thread will process
    start_range_train = int(nproc * train_lines / N_THREADS)
    end_range_train = int((nproc + 1) * train_lines / N_THREADS)

    start_range_test = int(nproc * test_lines / N_THREADS)
    end_range_test = int((nproc + 1) * test_lines / N_THREADS)

    rels_to_study = None
    rels_study_path = f"datasets/{DATASET}/relations_to_study.txt"
    if isfile(rels_study_path):
        rels_to_study = []
        with open(rels_study_path, "r") as f:
            for line in f:
                if line:
                    rels_to_study.append(line.strip().split("\t")[0])

    # Load the data from the training split
    _print("Loading training data")
    with open(PATH_TRAIN, "r") as f:
        for i, line in enumerate(f):
            spl = line.strip().split("\t")

            # Skip negative examples in the training split, since we generate our own negatives
            if len(spl) >= 4 and spl[3] != "1": continue

            s, r, t = spl[:3]
            if r not in possible_targets:
                possible_targets[r] = []
            possible_targets[r].append(t)

            graph.add_edge(s, t, rel=r, key=r)
            if start_range_train <= i < end_range_train and (
                    rels_to_study is None or r in rels_to_study):
                positive_train_triples.append((s, r, t))

    _print("Removing duplicate targets")
    # Remove duplicates from the possible targets dict
    for r, ls in possible_targets.items():
        possible_targets[r] = list(set(ls))

    with open(PATH_RELS, "r") as f:
        relations = [x.strip().split("\t")[0] for x in f.readlines()]

    # Generate the negatives by replacing the target entity with a random one
    # from the same range
    _print("Generating negatives")
    negative_train_triples = generate_negatives(positive_train_triples,
                                                possible_targets)
    labelled_triples_train = [
        ((s, r, t, 1), None) for s, r, t in positive_train_triples
    ] + negative_train_triples

    _print("Computing features for the training split")
    training_csv = open(f"output/{DATASET}/train.csv.{nproc}", "a")

    centrality_indices = degree_centrality(graph)

    if not rels_to_study:
        rels_to_study = relations

    t1 = time.thread_time()
    for (s, r, t, label), orig in iterator(labelled_triples_train):
        fvec = get_feature_vector(graph, (s, r, t),
                                  relations,
                                  bool(label),
                                  orig,
                                  centrality_indices=centrality_indices,
                                  rels_to_study=rels_to_study)
        training_csv.write(
            f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n")

    t2 = time.thread_time()
    training_csv.close()

    _print("Loading testing data")
    labelled_triples_test = []
    with open(PATH_TEST, "r") as f:
        for i, line in enumerate(f):
            if start_range_test <= i < end_range_test:
                spl = line.strip().split("\t")
                s, r, t, lbl = spl[:4]
                if rels_to_study is None or r in rels_to_study:
                    labelled_triples_test.append(
                        (s, r, t, 1 if lbl == "1" else 0))

    _print("Computing features for the testing split")
    testing_csv = open(f"output/{DATASET}/test.csv.{nproc}", "a")

    t3 = time.thread_time()
    for s, r, t, label in iterator(labelled_triples_test):
        try:
            fvec = get_feature_vector(graph, (s, r, t),
                                      relations,
                                      centrality_indices=centrality_indices,
                                      rels_to_study=rels_to_study)
        except NodeNotFound:
            # Since the testing data does not appear in the training split,
            # an entity present in the testing split may not appear in the
            # graph generated by the training split.
            continue
        testing_csv.write(
            f"{s},{r},{t};{label};{';'.join(str(x) for x in fvec)}\n")

    t4 = time.thread_time()
    testing_csv.close()

    elapsed_seconds = (t2 - t1) + (t4 - t3)

    with open("compute_times.txt", "a") as f:
        f.write(
            f"{DATASET};c{MAX_CONTEXT_SIZE};thread{nproc};{elapsed_seconds}\n")
예제 #29
0
    def fit(self, X_df, y_array):
        d = {'link': np.array(y_array)}
        y_array = pd.DataFrame(data=d)


        path = os.path.dirname(__file__)
        self.data = pd.read_csv(os.path.join(path, 'nodes_info_new.csv'),low_memory=False)

        def clean_date(s):
            s = re.sub('[^0-9]', '', str(s))
            if len(s)==0:
                return np.nan
            if s == '1':
                return np.nan
            if len(s)<4:
                date = int(s)
            else:
                date = int(s[:4])
                if date>2000:
                    date = int(s[:3])
            return date

        self.data['birth_date'] = self.data['birth_date'].apply(clean_date)
        self.data['death_date'] = self.data['death_date'].apply(clean_date)

        def get_country(s):
            s = re.sub('[^a-zA-Z ]', '', str(s))
            if len(s)==0:
                return np.nan
            return s.split()[-1]

        self.data['birth_place'] = self.data['birth_place'].apply(get_country)
        self.data['death_place'] = self.data['death_place'].apply(get_country)


        #defining a dictionary which contains information for each thinker according to their names
        self.thinker_dictionary = {}
        for i,row in self.data.iterrows():
            self.thinker_dictionary[row['thinker']] = {'thinker_id': row['id'], 'birth_date': row['birth_date'], 'birth_place': row['birth_place'], 'death_place': row['death_place'], 'death_date': row['death_date'], 'summary': row['summary']}



        max_id = self.data['id'].max()
        self.nodes = np.arange(1,max_id+1)

        self.edges = np.array([[self.thinker_dictionary[row['thinker_1']]['thinker_id'],self.thinker_dictionary[row['thinker_2']]['thinker_id']] for i,row in (X_df[(y_array['link']==1).values.flatten()]).iterrows()])

        self.G.add_nodes_from(self.nodes)
        self.G.add_edges_from(self.edges)

        self.graph_features = pd.DataFrame({'thinker_id':self.nodes})
        self.connected_comp = list(nx.connected_components(self.G))

        group_id = {}
        group_len = {}
        for think_id in self.nodes:
            for i,group in enumerate(self.connected_comp):
                if think_id in group:
                    group_id[think_id] = i
                    group_len[think_id] = len(group)
                    break

        self.graph_features['connected_comp'] = [group_id[think_id] for think_id in self.nodes]
        self.graph_features['connected_comp_len'] = [group_len[think_id] for think_id in self.nodes]

        self.graph_features['degree_centrality'] = degree_centrality(self.G).values()
        self.graph_features['degree_centrality']/=self.graph_features['degree_centrality'].max()

        self.graph_features['eigenvector_centrality'] = eigenvector_centrality(self.G).values()
        self.graph_features['eigenvector_centrality']/=self.graph_features['eigenvector_centrality'].max()

#        self.graph_features['closeness_centrality'] = closeness_centrality(self.G).values()
#        self.graph_features['closeness_centrality']/=self.graph_features['closeness_centrality'].max()

#        self.graph_features['betweenness_centrality'] = betweenness_centrality(self.G).values()
#        self.graph_features['betweenness_centrality']/=self.graph_features['betweenness_centrality'].max()

#        self.graph_features['subgraph_centrality'] = subgraph_centrality(self.G).values()
#        self.graph_features['subgraph_centrality']/=self.graph_features['subgraph_centrality'].max()

        self.graph_features['pagerank'] = nx.pagerank(self.G, alpha=0.9).values()
        self.graph_features['pagerank']/=self.graph_features['pagerank'].max()




        return self
예제 #30
0
def data():
    #read requirement file
    try:
        namafile = 'config.ini'
        config = configparser.ConfigParser()
        config.read_file(open(namafile))
        rurl = config.get('redis', 'REDIS_URL')
        rport = config.get('redis', 'REDIS_PORT')
        rpass = config.get('redis', 'REDIS_PASS')
    except KeyError:
        sys.stderr.write("Tidak Bisa Membuka File" + namafile + "\n")
        sys.exit(1)

    #Networkx graph configuration
    G = nx.Graph()

    #Redis graph configuration
    r = redis.Redis(host=rurl, port=rport, db=0, password=rpass)

    #list redis keys
    for k in r.keys('*'):
        #get value from redis keys
        value = str(r.get(k))

        #delete the header format (b') from value
        panjang = len(value)
        value = value[2:(panjang - 1)]

        #split the value
        arrvalue = value.split(',')

        #change data type to string
        root = str(k)

        #delete the header format (b') from root
        panjangkey = len(root)
        root = root[2:(panjangkey - 1)]

        for follower in arrvalue:
            # create edges list from key and value
            G.add_edge(root, follower)

    # Initialize graph, add nodes and edges, calculate modularity and centrality.
    groups = community.best_partition(G)
    degree = cn.degree_centrality(G)

    # Add node attributes for name, modularity, and three types of centrality.
    nx.set_node_attributes(G, groups, 'group')
    nx.set_node_attributes(G, degree, 'degree')

    # create json dictionary format for networkx edges
    data1 = json_graph.node_link_data(G)

    #output json file
    with open('static/data.json', 'w') as output:
        json.dump(data1,
                  output,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ':'))

    return data1
예제 #31
0
def calculate_all_centralities(data):
    """
        Calculates all four centralities metrics for the input graph
        Paramaters:
            data: a json object which represents the graph.
            This json is manipulated and the necessary metrics are added to it.
    """
    G = json_graph.node_link_graph(
        data)  #loads the data to a NetworkX graph object
    #Calculates three of the metrics
    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweeness = centrality.betweenness_centrality(G)
    eigenvector_fail = False
    try:  #Eigenvector centrality can fail to converge.
        eigenvector = centrality.eigenvector_centrality(DiGraph(G),
                                                        max_iter=100000)
    except NetworkXError:  #Eigenvector values will be None if calculation fails.
        eigenvector = []
        eigenvector_fail = True
        print "Max iterations exceeded"
    degree_max = -1.0
    closeness_max = -1.0
    betweeness_max = -1.0
    eigenvector_max = -1.0
    for author in data['nodes']:  #Adds the unnormalized values in the json
        i = author['id']
        author['degreeCentralityUnnormalized'] = degree[i]
        author['closenessCentralityUnnormalized'] = closeness[i]
        author['betweennessCentralityUnnormalized'] = betweeness[i]
        author['eigenvectorCentralityUnnormalized'] = eigenvector[
            i] if not eigenvector_fail else 1.0

    #Finds the highest values for each centrality type
    for i in degree:
        if degree[i] > degree_max:
            degree_max = degree[i]
    for i in closeness:
        if closeness[i] > closeness_max:
            closeness_max = closeness[i]
    for i in betweeness:
        if betweeness[i] > betweeness_max:
            betweeness_max = betweeness[i]
    for i in eigenvector:
        if eigenvector[i] > eigenvector_max:
            eigenvector_max = eigenvector[i]

    #Normalizes the values
    for i in degree:
        if degree[i] != 0:
            degree[i] = degree[i] / degree_max
    for i in closeness:
        if closeness[i] != 0:
            closeness[i] = closeness[i] / closeness_max
    for i in betweeness:
        if betweeness[i] != 0:
            betweeness[i] = betweeness[i] / betweeness_max
    for i in eigenvector:
        if eigenvector[i] != 0:
            eigenvector[i] = eigenvector[i] / eigenvector_max

    #Adds the normalized values to the json
    for author in data['nodes']:
        i = author['id']
        author['degreeCentrality'] = degree[i]
        author['closenessCentrality'] = closeness[i]
        author['betweennessCentrality'] = betweeness[i]
        author['eigenvectorCentrality'] = eigenvector[
            i] if not eigenvector_fail else 1.0
    return data
예제 #32
0
파일: pagerank.py 프로젝트: anton386/quark
 def central_it(self):
     self.central = centrality.degree_centrality(pg.graph)
def calculate_all_centralities(data):
    """
        Calculates all four centralities metrics for the input graph
        Paramaters:
            data: a json object which represents the graph.
            This json is manipulated and the necessary metrics are added to it.
    """
    G = json_graph.node_link_graph(data) #loads the data to a NetworkX graph object
    #Calculates three of the metrics
    degree = centrality.degree_centrality(G)
    closeness = centrality.closeness_centrality(G)
    betweeness = centrality.betweenness_centrality(G)
    eigenvector_fail = False
    try: #Eigenvector centrality can fail to converge.
        eigenvector = centrality.eigenvector_centrality(DiGraph(G),max_iter=100000)
    except NetworkXError: #Eigenvector values will be None if calculation fails.
        eigenvector = []
        eigenvector_fail = True
        print "Max iterations exceeded"
    degree_max = -1.0
    closeness_max = -1.0
    betweeness_max = -1.0
    eigenvector_max = -1.0
    for author in data['nodes']: #Adds the unnormalized values in the json
        i = author['id']
        author['degreeCentralityUnnormalized'] = degree[i]
        author['closenessCentralityUnnormalized'] = closeness[i]
        author['betweennessCentralityUnnormalized'] = betweeness[i]
        author['eigenvectorCentralityUnnormalized'] = eigenvector[i] if not eigenvector_fail else 1.0

    #Finds the highest values for each centrality type
    for i in degree:
        if degree[i]>degree_max:
            degree_max = degree[i]
    for i in closeness:
        if closeness[i]>closeness_max:
            closeness_max = closeness[i]
    for i in betweeness:
        if betweeness[i]>betweeness_max:
            betweeness_max = betweeness[i]
    for i in eigenvector:
        if eigenvector[i]>eigenvector_max:
            eigenvector_max = eigenvector[i]

    #Normalizes the values
    for i in degree:
        if degree[i] != 0:
            degree[i] = degree[i]/degree_max
    for i in closeness:
        if closeness[i] != 0:
            closeness[i] = closeness[i]/closeness_max
    for i in betweeness:
        if betweeness[i] != 0:
            betweeness[i] = betweeness[i]/betweeness_max
    for i in eigenvector:
        if eigenvector[i] != 0:
            eigenvector[i] = eigenvector[i]/eigenvector_max

    #Adds the normalized values to the json
    for author in data['nodes']:
        i = author['id']
        author['degreeCentrality'] = degree[i]
        author['closenessCentrality'] = closeness[i]
        author['betweennessCentrality'] = betweeness[i]
        author['eigenvectorCentrality'] = eigenvector[i] if not eigenvector_fail else 1.0
    return data
예제 #34
0
    edges = [r.split(',')[:2] for r in rows[1:]]
    weights = [r.split(',')[-1] for r in rows[1:]]
    edge_tuples=[(e[0], e[1], int(weights[i])) for i,e in enumerate(edges)]

# Only get edges for the select nodes in the node csv.
edges = []
for e in edge_tuples:
    if all(x in list(node_ids) for x in e[:2]):
        edges.append(e)

# Initialize graph, add nodes and edges, calculate modularity and centrality.
G = nx.Graph()
G.add_nodes_from(list(node_ids))
G.add_weighted_edges_from(edges)
groups = community.best_partition(G)
degree = cn.degree_centrality(G)
betweenness = cn.betweenness_centrality(G, weight='weight')
eigenvector = cn.eigenvector_centrality(G, weight='weight')

# Add node attributes for name, modularity, and three types of centrality.
nx.set_node_attributes(G, 'name', node_dict)
nx.set_node_attributes(G, 'group', groups)
nx.set_node_attributes(G, 'degree', degree)
nx.set_node_attributes(G, 'betweenness', betweenness)
nx.set_node_attributes(G, 'eigenvector', eigenvector)

# Create json representation of the graph (for d3).
data = json_graph.node_link_data(G)

# You could create the needed json without NetworkX (but you would forfeit network metrics).
#new_data = dict(nodes=[dict(id=n) for n in list(set(nodes))], links=[dict(source=node_dict[e[0]], target=node_dict[e[1]], weight=e[2]) for e in edges])