Ejemplo n.º 1
1
def plot_co_x(cox, start, end, size = (20,20), title = '', weighted=False, weight_threshold=10):

        """ Plotting function for keyword graphs

        Parameters
        --------------------
        cox: the coword networkx graph; assumes that nodes have attribute 'topic'
        start: start year
        end: end year
        """

        plt.figure(figsize=size)
        plt.title(title +' %s - %s'%(start,end), fontsize=18)
        if weighted:
            elarge=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] >weight_threshold]
            esmall=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] <=weight_threshold]
            pos=nx.graphviz_layout(cox) # positions for all nodes
            nx.draw_networkx_nodes(cox,pos,
                node_color= [s*4500 for s in nx.eigenvector_centrality(cox).values()],
                node_size = [s*6+20  for s in nx.degree(cox).values()],
                alpha=0.7)
            # edges
            nx.draw_networkx_edges(cox,pos,edgelist=elarge,
                                width=1, alpha=0.5, edge_color='black') #, edge_cmap=plt.cm.Blues
            nx.draw_networkx_edges(cox,pos,edgelist=esmall,
                                width=0.3,alpha=0.5,edge_color='yellow',style='dotted')
            # labels
            nx.draw_networkx_labels(cox,pos,font_size=10,font_family='sans-serif')
            plt.axis('off')
        else:
            nx.draw_graphviz(cox, with_labels=True,
                         alpha = 0.8, width=0.1,
                         fontsize=9,
                         node_color = [s*4 for s in nx.eigenvector_centrality(cox).values()],
                         node_size = [s*6+20 for s in nx.degree(cox).values()])
def print_degree_distributions(dataset, context):
    """
    Extracts degree distribution values from networks, and print them to
    cvs-file.

    **warning** overwrites if file exists.
    """
    print '> Reading data..', dataset
    corpus_path = '../data/'+dataset+'_text'
    (documents, labels) = data.read_files(corpus_path)

    degsfile = open('output/properties/cooccurrence/degrees_docs_'+dataset.replace('/','.'), 'w')

    giant = nx.DiGraph()
    print '> Building networks..'
    for i, text in enumerate(documents):
        if i%10==0: print '   ',str(i)+'/'+str(len(documents))
        g = graph_representation.construct_cooccurrence_network(text,context=context)
        giant.add_edges_from(g.edges())
        degs = nx.degree(g).values()
        degs = [str(d) for d in degs]
        degsfile.write(','.join(degs)+'\n')
    degsfile.close()

    print '> Writing giant\'s distribution'
    with open('output/properties/cooccurrence/degrees_giant_'+dataset.replace('/','.'), 'w') as f:
        ds = nx.degree(giant).values()
        ds = [str(d) for d in ds]
        f.write(','.join(ds))
Ejemplo n.º 3
0
def draw_degree_rank_plot(orig_g, mG):
    ori_degree_seq = sorted(nx.degree(orig_g).values(), reverse=True)  # degree sequence
    deg_seqs = []
    for newg in mG:
        deg_seqs.append(sorted(nx.degree(newg).values(), reverse=True))  # degree sequence
    df = pd.DataFrame(deg_seqs)

    plt.xscale("log")
    plt.yscale("log")
    plt.fill_between(df.columns, df.mean() - df.sem(), df.mean() + df.sem(), color="blue", alpha=0.2, label="se")
    h, = plt.plot(df.mean(), color="blue", aa=True, linewidth=4, ls="--", label="H*")
    orig, = plt.plot(ori_degree_seq, color="black", linewidth=4, ls="-", label="H")

    plt.title("Degree Distribution")
    plt.ylabel("Degree")
    plt.ylabel("Ordered Vertices")

    plt.tick_params(
        axis="x",  # changes apply to the x-axis
        which="both",  # both major and minor ticks are affected
        bottom="off",  # ticks along the bottom edge are off
        top="off",  # ticks along the top edge are off
        labelbottom="off",
    )  # labels along the bottom edge are off

    plt.legend([orig, h], ["$H$", "HRG $H^*$"], loc=3)
    # fig = plt.gcf()
    # fig.set_size_inches(5, 4, forward=True)
    plt.show()
Ejemplo n.º 4
0
def PartitionGraph(graph,kc_nodes,anchor=EMPTY_SET,k=2):
    G = graph.copy()
    R_cand = set()
    S_cand = set()
    
    G_ccs = nx.connected_component_subgraphs(G)
    
    for g_cc in G_ccs:
        cc_nodes = set(g_cc.nodes())
        kc_overlap = cc_nodes.intersection(kc_nodes)
        if len(kc_overlap) > 0:
            root = kc_overlap.pop()
            R_nodes = set()
            for n in cc_nodes:
                d = nx.degree(G,n) 
                if n not in anchor and n not in kc_nodes and d > 0 and d < k:
                    R_nodes.add(n)
                R_cand = R_cand.union(set((u,root) for u in R_nodes))                
        else:
            S_nodes = set()
            for n in cc_nodes:
                d = nx.degree(G,n) 
                if n not in anchor and d > 0 and d < k:
                    S_nodes.add(n)
            S_cand = S_cand.union(set((u,v) for u,v in combinations(S_nodes,2)))    

    return R_cand,S_cand
Ejemplo n.º 5
0
def RW_Size(G,r = 1000,m=100):
    sampled = []
    now_node = random.choice(G.nodes())
    sampled.append(now_node)
    while True:
        next_node = random.choice(nx.neighbors(G,now_node))
        now_node = next_node
        sampled.append(now_node)
        if len(sampled) >= r:
            break
    print(1)
    lst = []
    for i in range(0,r-m):
        if i+m <= r-1:
            for j in range(i+m,r):
               # l1 = set(nx.neighbors(G,sampled[i]))
               # l2 = set(nx.neighbors(G,sampled[j]))
               # if len(list(l1 & l2)) >= 1:
                lst.append((sampled[i],sampled[j]))
                lst.append((sampled[j],sampled[i]))
    sumA = 0.0
    sumB = 0.0
    print(len(lst))
    for nodes in lst:
        sumA += float(nx.degree(G,nodes[0]))/nx.degree(G,nodes[1])
        l1 = set(nx.neighbors(G,nodes[0]))
        l2 = set(nx.neighbors(G,nodes[1]))
        count = len(list(l1&l2))
        sumB += count/(float(nx.degree(G,nodes[0]))*nx.degree(G,nodes[1]))
    return sumA/sumB
Ejemplo n.º 6
0
def compute_eps(G2, k, u, S1, S2):
    max_deg_G2 = max(nx.degree(G2).itervalues())
    print "max_deg_G2 =", max_deg_G2
    deg_list = nx.degree(G2)
    
#    start = time.clock()
    X = compute_X(G2, max_deg_G2)
#    print "compute_X: done"
#    print "Elapsed ", (time.clock() - start
    Y = compute_Y(X, G2.number_of_nodes(), max_deg_G2)
#    print "compute_Y: done"
    ent = compute_entropy(Y, G2.number_of_nodes(), max_deg_G2)
#    print "len(ent) =", len(ent)
    
    num_violated = 0
    LOG2K = math.log(k,2)
    print "LOG2K =", LOG2K
    for (v, deg) in deg_list.iteritems():   # check the original graph
        if (v == u or v in S1) and ent[deg] < LOG2K:
            num_violated += 1
    # check and update eps_min (if ok)
    eps2 = float(num_violated)/G2.number_of_nodes()
    
    #
    return eps2
Ejemplo n.º 7
0
def compute_eps_deterministic_multi(G, G2, k_arr):
    max_deg_G2 = max(nx.degree(G2).itervalues())
    print "max_deg =", max_deg_G2
    deg_list = nx.degree(G) 
    deg_list_G2 = nx.degree(G2)  
    
    deg_count_G2 = [0 for j in range(max_deg_G2+1)]
    for deg in deg_list_G2.itervalues():
        deg_count_G2[deg] += 1
    
    ent = [0.0 for j in range(max_deg_G2+1)]
    for j in range(max_deg_G2+1):
        if deg_count_G2[j] > 0:
            ent[j] = math.log(deg_count_G2[j],2)           # simple
    
    print "len(ent) =", len(ent)
#    print "entropy =", ent
    
    eps_arr = []
    for k in k_arr:
        num_violated = 0
        LOG2K = math.log(k,2)
        for (v, deg) in deg_list.iteritems():   # check the original graph
            if deg <= max_deg_G2:               # in the case of max_deg_G2 < max_deg_G
                if ent[deg] > 0.0 and ent[deg] < LOG2K:   # do not check zero-column of ent
                    num_violated += 1
        # check and update eps_min (if ok)
        eps2 = float(num_violated)/G2.number_of_nodes()
    
        eps_arr.append(eps2)
    #
    return eps_arr
Ejemplo n.º 8
0
def RW_Size_col(G,r = 30000):
    sampled = []
    now_node = random.choice(G.nodes())
    sampled.append(now_node)
    sumA = 0.0
    sumB = 0.0
    sumA += nx.degree(G,now_node)
    sumB += 1.0/nx.degree(G,now_node)
    count = 0
    while True:
        next_node = random.choice(nx.neighbors(G,now_node))
        now_node = next_node
        sumA += nx.degree(G,now_node)
        sumB += 1.0/nx.degree(G,now_node)
        sampled.append(now_node)
        count += 1
        if count >= r:
            break
    count2 = 0
    for i in range(0,len(sampled)-1):
        for j in range(i+1,len(sampled)):
            if(sampled[i] == sampled[j]):
                count2 += 1

    return sumA*sumB/(2*count2)
Ejemplo n.º 9
0
def compute_eps_multi(G, G2, k_arr):
    max_deg_G2 = max(nx.degree(G2).itervalues())
    print "max_deg_G2 =", max_deg_G2
    deg_list = nx.degree(G)     # G not G2
    deg_list_G2 = nx.degree(G2)     
    
    X = compute_X(G2, G2.number_of_nodes(), max_deg_G2, deg_list_G2)
    Y = compute_Y(X, G2.number_of_nodes(), max_deg_G2, deg_list_G2)
    # check X, Y
    check_X_and_Y(X, Y, G2.number_of_nodes(), max_deg_G2, deg_list_G2)
    
    ent = compute_entropy(Y, G2.number_of_nodes(), max_deg_G2, deg_list_G2)
    print "len(ent) =", len(ent)
#    print "entropy =", ent
    
    eps_arr = []
    for k in k_arr:
        num_violated = 0
        LOG2K = math.log(k,2)
#        print "LOG2K =", LOG2K
        for (v, deg) in deg_list.iteritems():   # check the original graph
            if deg <= max_deg_G2:               # in the case of max_deg_G2 < max_deg_G
                if ent[deg] > 0.0 and ent[deg] < LOG2K:   # do not check zero-column of ent
                    num_violated += 1
        # check and update eps_min (if ok)
        eps2 = float(num_violated)/G2.number_of_nodes()
    
        eps_arr.append(eps2)
    #
    return eps_arr
Ejemplo n.º 10
0
def cand_gen(G, k, a, c, cores=None):
    '''
    generate all the branch given a k-plex a
    a: current k-plex
    c: the block
    '''
    b = neighbors(G, a)
    b.difference_update(c)

    # the strict nodes
    subg = G.subgraph(a)
    strict_nodes = {node for node in a if nx.degree(subg, node) == len(a)-k }
    for node in strict_nodes:
        b.intersection_update(G.neighbors(node))


    # always reshape by optimal
    if cores is None:
        b = {node for node in list(b) if nx.degree(G, node) >= len(optimal)-k}
    else:
        b = {node for node in list(b) if cores[node]>=len(optimal)-k}

    # calculate the valid candidates
    b = {node for node in b if len(set(G.neighbors(node)).intersection(a)) >= len(a)+1-k }

    # sort the candidate list
    b = list(b)
    # b.sort(key = lambda x: len(set(G.neighbors(x)).intersection(a)), reverse=True)

    return b
Ejemplo n.º 11
0
def leaf_removal(g, verbose=False):
    G = g.copy()
    stop = 0;
    potential_mis = [];
    isolated = [x for x in g.nodes() if nx.degree(g,x)==0];
    potential_mis.extend(isolated);
    G.remove_nodes_from(isolated);
    while stop==0:
        deg = G.degree();
        if 1 in deg.values():
            for n in G.nodes_iter():
                if deg[n]==1:
                    L = n;
                    break;
            nn = nx.neighbors(G,L)[0]
            G.remove_node(L);
            G.remove_node(nn);
            potential_mis.append(L);
            isolated = [x for x in G.nodes() if nx.degree(G,x)==0];
            potential_mis.extend(isolated);
            G.remove_nodes_from(isolated);
        else:
            stop=1;
    core_mis = [];
    if G.number_of_nodes()>=1:
        core_mis = nx.maximal_independent_set(G);
        if verbose==True:
            print len(potential_mis), len(core_mis), N;
        potential_mis.extend(core_mis);
    else:
        if verbose==True:
            print len(potential_mis), len(core_mis), N;
    return potential_mis, core_mis;
Ejemplo n.º 12
0
    def __generate_paths(self):
        self.__paths = []
        graph = copy.deepcopy(self.__graph)
        paths = []
        if not graph.nodes():
            return
        start_nodes = [n for n in graph.nodes() if (n.style() != "curve" or (nx.degree(graph, n) == 1))]
        start_nodes = sorted(start_nodes, key=lambda n: nx.degree(graph, n))
        if start_nodes:
            path = [start_nodes[0]]
        else:
            path = [graph.nodes()[0]]

        while path:
            neighbors = nx.neighbors(graph, path[-1])
            if neighbors:
                node = neighbors[0]
                graph.remove_edge(path[-1], node)
                path.append(node)
            else:
                paths.append(copy.copy(path))
                while path and not graph.neighbors(path[-1]):
                    path.pop()

        for path in paths:
            self.__paths.append(reduce_path(path))

        return
Ejemplo n.º 13
0
def nodeMaxDegree(G):
    degree=0
    for n in G.nodes():
        if nx.degree(G,n)> degree :
            degree=nx.degree(G,n)
            node=n
    return node
def _tester():
    # think of this like a networkx scratchpad
    G = nx.Graph()  # this is an undirected graph
    G.add_edge(1, 2)
    G.add_edge(2, 3)
    G.add_node(4)
    print nx.degree(G)
    print nx.info(G)
Ejemplo n.º 15
0
def mindeg_GSK(BG, variables_index=0, verbose=False):
    Vprime1 = [];
    Vprime2 = [];
    
    layer = nx.get_node_attributes(BG,'bipartite');
    var = [x for x in BG.nodes() if layer[x] == variables_index]
    fac = [x for x in BG.nodes() if layer[x] != variables_index]
    
    if verbose==True:
        print 'Initial variable nodes:', var;
        print 'Initial factor nodes:', fac;

    isolated_variables = [x for x in BG.nodes() if nx.degree(BG,x)==0 and layer[x]==variables_index];
    [var.remove(x) for x in isolated_variables]
    
    G = BG.copy();
    Vprime1.extend(isolated_variables);
    G.remove_nodes_from(isolated_variables)
    
    isolated_factors = [x for x in G.nodes() if nx.degree(BG,x)==0 and layer[x]!=variables_index];
    [fac.remove(x) for x in isolated_factors]
    G.remove_nodes_from(isolated_factors);

    while len(var)>0:
        if verbose==True:
            print '#var:',len(var),'#fac:', len(fac), '#nodes in depleted graph:', G.number_of_nodes(),'#original BG:',BG.number_of_nodes();

        pendant = return_mindeg_pendant(G,layer,variables_index);
        if len(pendant)==0:
            ## if not, choose randomly and do the game. 
            if verbose==True:
                print var
            m = G.number_of_nodes()*2;
            degs = G.degree();
            for e in G.edges():
                if degs[e[0]] + degs[e[1]] < m:
                    m = degs[e[0]] + degs[e[1]];
                    v = e;
            if e[0] in var:
                v = e[0];
            else:
                v = e[1];
            pendant = []
            pendant.append(v);
            pendant.extend(nx.neighbors(G,v));
            Vprime2.append(pendant[0]);
        else:
            Vprime1.append(pendant[0]);
        augmented_pendant = []
        augmented_pendant.extend(pendant);
        for n in pendant[1:]:
            augmented_pendant.extend(nx.neighbors(G,n));
        augmented_pendant = list(set(augmented_pendant));
        G.remove_nodes_from(augmented_pendant);        
        [var.remove(x) for x in augmented_pendant if x in var];
        [fac.remove(x) for x in augmented_pendant if x in fac];

    return Vprime1,Vprime2;
Ejemplo n.º 16
0
 def k_dependency_feats(self):
     wordindex = self.index + 1
     headindex = dep_head_of(self.deptree,wordindex)
     D = {}
     D["k_dist_to_root"] = len(dep_pathtoroot(self.deptree,wordindex))
     D["k_deprel"] = self.deptree[headindex][wordindex]["deprel"]
     D["k_headdist"] = abs(headindex - wordindex) # maybe do 0 for root?
     D["k_head_degree"] = nx.degree(self.deptree,headindex)
     D["k_child_degree"] = nx.degree(self.deptree,wordindex)
     return D
Ejemplo n.º 17
0
def sub_by_degree(min_degree,net):
    #return a subset of net by biger than min_degree
    remove_node_list = [item for item in nx.degree(net) \
            if nx.degree(net)[item] < min_degree]
    new_net = nx.Graph()
    new_net.add_nodes_from(net.nodes())
    new_net.add_edges_from(net.edges())
    for name in remove_node_list:
        new_net.remove_node(name)
    return new_net
Ejemplo n.º 18
0
def main():
    LOG = True

    #if (len(sys.argv) != 3):
     #       print "ERROR: genRandomGeorml <nodes> <raio>"
      #      sys.exit(1)

    NMAX = int(sys.argv[1])
    RAIO = float(sys.argv[2])
    #NMAX=40
    #RAIO=0.1
    ALCANCE=250

    G=nx.random_geometric_graph(NMAX,RAIO,2)

    while not nx.is_connected(G):
         RAIO=RAIO+.005
         G=nx.random_geometric_graph(NMAX,RAIO,2)
         if LOG: print "Graph is not full connected"

    pos=nx.get_node_attributes(G,'pos')
    network(G,pos,1)

    #Remove vizinhos que estejam demasiado perto
    while nodeNear(G)<1000 :
        G.remove_node(nodeNear(G))

    if nx.is_connected(G):
        pos=nx.get_node_attributes(G,'pos')
        network(G,pos,2)

        #Remove no que tem mais vizinhos
        T=G
        if not nodeSolo(T,nodeMaxDegree(T)): T.remove_node(nodeMaxDegree(T))
        if nx.is_connected(T):
                G=T

        pos=nx.get_node_attributes(G,'pos')
        network(G,pos,3)



        for n in G.neighbors(nodeMaxDegree(G)):
            if nx.degree(G,n)== 2 :
                degree=nx.degree(G,n)
                node=n
                print "node=",n
                if not nodeSolo(G,n): G.remove_node(n)
                break
        
        pos=nx.get_node_attributes(G,'pos')
        network(G,pos,4)
    else:
        if LOG: print "SubGraph is not full connected"
Ejemplo n.º 19
0
def remove_by_degree(G, d):
    '''
    remove nodes by degree
    '''
    nodes_to_be_delete = [i for i in G.nodes() if nx.degree(G, i) < d]
    while len(nodes_to_be_delete) > 0:
        print(nodes_to_be_delete)
        if 164373 in nodes_to_be_delete:
            a = input()
        G.remove_nodes_from(nodes_to_be_delete)
        nodes_to_be_delete = [i for i in G.nodes() if nx.degree(G, i) < d]
    return
Ejemplo n.º 20
0
def compute_measures(bigDict):
    """ Computes the measures for each network
    
    Measures to compute:
    
    nr_of_nodes
    nr_of_edges
    
    max_edge_value
    min_edge_value
    
    is_connected
    number_connected_components
    
    average_unweighted_node_degree
    average_weighted_node_degree
    
    average_clustering_coefficient
    average_weighted_shortest_path_length
    average_unweighted_shortest_path_length
    
    To be added:
     single node values, e.g. node degree of brainstem etc.
    
    Non-scalar return values: (not used yet)
     degree_distribution
     edge_weight_distribution
    
    """
    
    returnMeasures = {}

    for key, netw in bigDict.items():

        outm = {}
        
        outm['nr_of_nodes'] = netw.number_of_nodes()
        outm['nr_of_edges'] = netw.number_of_edges()
        
        outm['max_edge_value'] = np.max([d['weight']for f,t,d in netw.edges(data=True)])
        outm['min_edge_value'] = np.min([d['weight']for f,t,d in netw.edges(data=True)])
        
        outm['is_connected'] = nx.is_connected(netw)
        outm['number_connected_components'] = nx.number_connected_components(netw)
        outm['average_unweighted_node_degree'] =  np.mean(nx.degree(netw, weighted = False).values())
        outm['average_weighted_node_degree'] = np.mean(nx.degree(netw, weighted = True).values())
        outm['average_clustering_coefficient'] = nx.average_clustering(netw)
        outm['average_weighted_shortest_path_length'] = nx.average_shortest_path_length(netw, weighted = True)
        outm['average_unweighted_shortest_path_length'] = nx.average_shortest_path_length(netw, weighted = False)
        
        returnMeasures[key] = outm
        
    return returnMeasures
Ejemplo n.º 21
0
	def apply_sifi_surcharge(self):
		degree_sum = 0
		for bank in self.network.contracts:
			degree_sum += float(nx.degree(self.network.contracts)[bank])
		average_degree = float(degree_sum / len(self.network.contracts.nodes()))
		
		for bank in self.network.contracts:
			# the sifi surcharge is the product of the sifiSurchargeFactor and the connectedness as measured
			# by degree/average_degree 
			# the maximum ensures that no bank has to hold less than 1.0 times their banking capital
			sifiSurcharge = max(self.get_state(0).sifiSurchargeFactor*( float(nx.degree(self.network.contracts)[bank]) / average_degree), 1.0)
			bank.apply_sifi_surcharge(sifiSurcharge)
def construct_graph_list_und(graphs_to_const):
    """Construct and return a list of graphs so graph construction is easily
    repeatable.

    Can handle: Random, Small-world, Scale-free, SGPA, SGPA-random"""

    graph_list = []

    # Always construct and add Allen Institute mouse brain to list
    G_brain = brain_graph.binary_undirected()[0]
    graph_list.append(G_brain)

    # Calculate degree & clustering coefficient distribution
    n_nodes = G_brain.order()

    brain_degree = nx.degree(G_brain).values()
    brain_degree_mean = np.mean(brain_degree)

    # Construct degree controlled random
    if 'Random' in graphs_to_const:
        G_RAND = und_graphs.random_simple_deg_seq(
            sequence=brain_degree, brain_size=BRAIN_SIZE, tries=1000)[0]
        graph_list.append(G_RAND)

    # Construct small-world graph
    if 'Small-world' in graphs_to_const:
        graph_list.append(nx.watts_strogatz_graph(
            n_nodes, int(round(brain_degree_mean)), SW_REWIRE_PROB))

    # Construct scale-free graph
    if 'Scale-free' in graphs_to_const:
        graph_list.append(nx.barabasi_albert_graph(
            n_nodes, int(round(brain_degree_mean / 2.))))

    # Construct SGPA graph
    if 'SGPA' in graphs_to_const:
        G_SGPA = source_growth(bc.num_brain_nodes, bc.num_brain_edges_directed,
                               L=LENGTH_SCALE)[0]
        graph_list.append(G_SGPA.to_undirected())

        # Construct degree-controlled SGPA graph
        if 'SGPA-random' in graphs_to_const:
            SGPA_degree = nx.degree(G_SGPA).values()
            G_SGPA_RAND = und_graphs.random_simple_deg_seq(
                sequence=SGPA_degree, brain_size=BRAIN_SIZE, tries=1000)[0]
            graph_list.append(G_SGPA_RAND)

    # Error check that we created correct number of graphs
    if len(graph_list) != len(graphs_to_const):
        raise RuntimeError('Graph list/names don\'t match')

    return graph_list
Ejemplo n.º 23
0
def make_hist(gnm, sw, rw):
  degree_sequence1=sorted(nx.degree(gnm).values(),reverse=True)
  degree_sequence2=sorted(nx.degree(sw).values(),reverse=True)
  degree_sequence3=sorted(nx.degree(rw).values(),reverse=True)

  counts1 = {}
  counts2 = {}
  counts3 = {}
  for i in range(len(degree_sequence1)):
    if(i in counts1):
      counts1[i] += 1
    else: counts1[i] = 1

  for i in range(len(degree_sequence2)):
    if(i in counts2):
      counts2[i] += 1
    else: counts2[i] = 1

  for i in range(len(degree_sequence3)):
    if(i in counts3):
      counts3[i] += 1
    else: counts3[i] = 1

  counts = []
  for i in range(len(counts1)):
    counts.append(float(counts1[i]))

  p1, = plt.loglog(counts,'b-',marker='o')

  counts = []
  for i in range(len(counts2)):
    counts.append(float(counts2[i]))

  p2, = plt.loglog(counts,'r-',marker='o')

  counts = []

  for i in range(len(counts3)):
    counts.append(float(counts3[i]))

  p3, = plt.loglog(counts,'y-',marker='o')


  #p1, = plt.loglog(counts1,'b-',marker='o')
  #p2, = plt.loglog(counts2,'r-',marker='o')
  #p3, = plt.loglog(counts3,'y-',marker='o')
  plt.title("Degree rank plot")
  plt.ylabel("degree")
  plt.xlabel("rank")
  plt.legend([p1, p2, p3], ["Gnm", "Small-World", "Real-World"])
  plt.savefig("test3.png")
  plt.close()
Ejemplo n.º 24
0
 def findEndpointsBifurcations(self, verbose = False):
     """For the current graph, identify all points that are either
     endpoints (1 neighbor) or """+\
     """bifurcation points (3 neighbors)"""
     endpoints = []
     bifurcations = []
     for n in self.cg.nodes_iter():
         if( nx.degree(self.cg,n) == 1 ):
             endpoints.append(n)
         elif( nx.degree(self.cg,n) >= 3 ):
             bifurcations.append(n)
     self.endpoints[self.currentGraphKey] = endpoints
     self.bifurcations[self.currentGraphKey] = bifurcations
Ejemplo n.º 25
0
def gp(G,edge_pro_dict,node_degree_dict):

    Ggp=nx.Graph()

    for each_node in G.nodes():
        Ggp.add_node(each_node)
    edge_pro_order=sorted(edge_pro_dict.iteritems(), key=lambda d:d[1],reverse=True)
    for each_edge_order in edge_pro_order:
        u=each_edge_order[0][0]
        v=each_edge_order[0][1]
        discrepency_u=nx.degree(Ggp)[u]-node_degree_dict[u]
        discrepency_v=nx.degree(Ggp)[v]-node_degree_dict[v]
        if(abs(discrepency_u+1)+abs(discrepency_v+1)<abs(discrepency_u)+abs(discrepency_v)):
            Ggp.add_edge(u,v)
    return Ggp
Ejemplo n.º 26
0
def RemoveCore(graph,k=2,anchor=EMPTY_SET):
    G = graph.copy()
    Gn = FindKCore(G,k,anchor)
    for u,v in Gn.edges_iter():
        if u != v:
            G.remove_edge(u,v)

    KC_nodes = set(filter(lambda n: nx.degree(G,n) > 0,Gn.nodes()))

    G_small = G.copy()
    for n in G.nodes_iter():
        if nx.degree(G,n) == 0:
            G_small.remove_node(n)
            
    return G, G_small,KC_nodes
Ejemplo n.º 27
0
def participation_coefficient(graph, partition):
    '''
    Computes the participation coefficient for each node.

    ------
    Inputs
    ------
    graph = networkx graph
    partition = modularity partition of graph

    ------
    Output
    ------
    List of the participation coefficient for each node.

    '''
    
    pc_dict = {}
    all_nodes = set(graph.nodes())
    paths = nx.shortest_path_length(G=graph)
    for m in partition.keys():
        mod_list = set(partition[m])
        between_mod_list = list(set.difference(all_nodes, mod_list))
        for source in mod_list:
            degree = float(nx.degree(G=graph, nbunch=source))
            count = 0
            for target in between_mod_list:
                if paths[source][target] == 1:
                    count += 1
            bm_degree = count
            pc = 1 - (bm_degree / degree)**2
            pc_dict[source] = pc
    return pc_dict
Ejemplo n.º 28
0
	def give_output_list(self, game):
		""" This returns a list of the selected nodes. The twin attack player
		finds the highest degree nodes, and for each, it selects two
		neighbors of that node and"""

		nodes = nx.nodes(game.network)

		nodes.sort(key=lambda x : nx.degree(game.network, x), reverse=True)

		selections = set()

		for node in nodes:

			adjacents = list(nx.all_neighbors(game.network, node))

			for adj_node in adjacents[:2]:

				selections.add(adj_node)
				if len(selections) == game.num_seeds:
					break

			if len(selections) == game.num_seeds:
				break

		assert len(selections) == game.num_seeds
		return list(selections)
def draw_graph(graph2):
    plt.clf()
    nodes = set([n1 for n1, n2 in graph2] + [n2 for n1, n2 in graph2])  # Extract nodes from graph

    G = nx.Graph()   # Graph - No Edges

    for node in nodes:    #Nodes
        G.add_node(node)

    for edge in graph2:    #Edges
        G.add_edge(edge[0], edge[1])

    pos = nx.spring_layout(G)    # Layout settings
    nx.draw_networkx_nodes(G,pos,node_size=1500, node_color='w', font_size=6)
    nx.draw_networkx_edges(G,pos,alpha=0.75,width=3)
    nx.draw_networkx_labels(G,pos, font_color='b')

    plt.title('Twitter Hashtag Graph')
    plt.axis('off')     # Show graph
    plt.savefig(".\\images\\graph.png")
    
    # Calculate average degree
    average_degree = np.mean(nx.degree(G).values())
    ft2 = open(sys.argv[2], 'a')    # Write to ft2.txt
    
    if np.isnan(average_degree): # NaN for no hashtags
        ft2.write('0.00'+'\n')
    else:
        aver_deg = format(average_degree, '.2f')
        ft2.write(str(aver_deg)+'\n')
    ft2.close()
    
    return
Ejemplo n.º 30
0
def draw_graph(username, password, filename='graph.txt', label_flag=True, remove_isolated=True, different_size=True, iso_level=10, node_size=40):
    """Reading data from file and draw the graph.If not exists, create the file and re-scratch data from net"""
    print "Generating graph..."
    try:
        with open(filename, 'r') as f:
            G = p.load(f)
    except:
        G = getgraph(username, password)
        with open(filename, 'w') as f:
            p.dump(G, f)
    #nx.draw(G)
    # Judge whether remove the isolated point from graph
    if remove_isolated is True:
        H = nx.empty_graph()
        for SG in nx.connected_component_subgraphs(G):
            if SG.number_of_nodes() > iso_level:
                H = nx.union(SG, H)
        G = H
    # Ajust graph for better presentation
    if different_size is True:
        L = nx.degree(G)
        G.dot_size = {}
        for k, v in L.items():
            G.dot_size[k] = v
        node_size = [G.dot_size[v] * 10 for v in G]
    pos = nx.spring_layout(G, iterations=50)
    nx.draw_networkx_edges(G, pos, alpha=0.2)
    nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color='r', alpha=0.3)
    # Judge whether shows label
    if label_flag is True:
        nx.draw_networkx_labels(G, pos, alpha=0.5)
    #nx.draw_graphviz(G)
    plt.show()

    return G
Ejemplo n.º 31
0
import networkx as nx
import network_attack as na

graph = nx.erdos_renyi_graph(n=300, p=0.1)
print("the total number of edges:")
n_edges = nx.number_of_edges(graph)
print(n_edges)
print("the total number of nodes:")
n_nodes = nx.number_of_nodes(graph)
print(n_nodes)
n_cc = nx.number_connected_components(graph)
print("the total number of connected components:")
print(n_cc)
print("the density of the graph:")
print(nx.diameter(graph))
avg_deg = sum([d for (n, d) in nx.degree(graph)]) / float(graph.number_of_nodes())
print("the average degree is " + str(avg_deg))

closeness_centrality = nx.closeness_centrality
pagerank_centrality = nx.pagerank
betweenness_centrality = nx.betweenness_centrality

#   GCC ATTACK
clo_gcc_att = na.gcc_attack(graph, closeness_centrality)
pgr_gcc_att = na.gcc_attack(graph, pagerank_centrality)
bet_gcc_att = na.gcc_attack(graph, betweenness_centrality)
rnd_gcc = na.rnd_gcc_attack(graph, 1)
na.attack_measures_plot("THe Giant Component Component Size", clo_gcc_att, pgr_gcc_att, bet_gcc_att, rnd_gcc)

#   Diameter ATTACK
clo_dia_att = na.diameter_attack(graph, closeness_centrality)
graph.py













import networkx as nx
import matplotlib.pyplot as plt
from random import random

g = nx.read_edgelist('GraphData.txt',create_using=nx.DiGraph(),nodetype=int)

print nx.info(g)

d = nx.degree(g) #for node sizes

colors = [(random(), random(), random()) for _i in range(10)] #for different node colors

nx.draw(g,nx.random_layout(g),with_labels=True,node_size=[v * 300 for v in d.values()], node_color=colors,alpha=0.7)

plt.show()

Ejemplo n.º 33
0
nx.draw_networkx_nodes(G, pos, alpha = 0.6, node_size = 350)
# edges
nx.draw_networkx_edges(G, pos, edgelist = elarge,
                      width = 2, alpha = 0.9, edge_color = 'g')
nx.draw_networkx_edges(G, pos, edgelist = emidle,
                      width = 1.5, alpha = 0.6, edge_color = 'y')
nx.draw_networkx_edges(G, pos, edgelist = esmall,
                      width = 1, alpha = 0.3, edge_color = 'b', style = 'dashed')
# labels
nx.draw_networkx_labels(G, pos, font_size = 10)
plt.axis('off')
plt.title("《红楼梦》社交网络")
plt.show()

# 计算每个节点的度
Gdegree = nx.degree(G)
Gdegree = dict(Gdegree)
Gdegree = pd.DataFrame({"name": list(Gdegree.keys()),
                       "degree": list(Gdegree.values())})
Gdegree.sort_values(by = "degree", ascending = False).plot(x = "name",
                                                          y = "degree",
                                                          kind = "bar",
                                                          figsize = (12, 6),
                                                          legend = False)
plt.xticks(FontProperties = font, size = 5)
plt.ylabel("degree")
plt.show()

plt.figure(figsize = (12, 12))
# 生成社交网络图
G = nx.Graph()
Ejemplo n.º 34
0
def Find_CNP(G, mixed_label = False):
    #Find all component of G
    G_components = list(nx.connected_components(G))
    G_temp = G.copy()
    edge_cut = []
    while len(G_components) != 0:
        min_ratioN1 = float('inf')
        min_ratioN2 = float('inf')
        componentOfG = G.subgraph(G_components[0]).copy() #we get the first component and find cnp
        if len(componentOfG.nodes()) <= 3:
            del G_components[0]
            continue
        for v in G_components[0]:
            neighbor1 = list(componentOfG.neighbors(v))
            neighbor1.extend([v])
            induced_N1 = componentOfG.subgraph(neighbor1)
            #----------- Clustering Coefficient
            length_n1 = len(neighbor1)
            length_e1 = len(list(induced_N1.edges()))
            if length_n1==1 or length_n1==0:
                cc=0
            else:
                cc = 2*length_e1/(length_n1*(length_n1-1))
            #-----Calculating clustering coefficient is finished here!-------
            cutRatioN1_V4 = coherentCutRatio_V4(nx.degree(induced_N1),nx.degree(componentOfG,induced_N1.nodes()),cc)
            cutRatioN1_V1 = coherentCutRatio_V1(nx.degree(induced_N1),nx.degree(componentOfG,induced_N1.nodes()))
            cutRatioN1 = stat.mean([cutRatioN1_V4,cutRatioN1_V1])
            if cutRatioN1 < min_ratioN1:
                min_ratioN1 = cutRatioN1
                #calculate edge cut for this minimum cut ratio
                cnp_nodes1 = induced_N1.nodes()
                edge_cutN1 = edgeCutSet_V2(induced_N1,componentOfG)
                minN1_v4 = cutRatioN1_V4
                minN1_v1 = cutRatioN1_V1
            #Get neighbor2 for node v
            neighbor2 = neighbor1[:]
            [neighbor2.extend(list(componentOfG.neighbors(n))) for n in neighbor1]
            neighbor2 = list(set(neighbor2))
            induced_N2 = componentOfG.subgraph(neighbor2).copy()
            induced_N2 = induced_N2.copy()
            Complement_indN2 = nx.complement(induced_N2)
            if(not nx.is_connected(Complement_indN2)):#we find the CNP
                #----------- Clustering Coefficient
                length_n2 = len(neighbor2)
                length_e2 = len(list(induced_N2.edges()))
                if length_n2==1 or length_n2==0:
                    cc=0
                else:
                    cc = 2*length_e2/(length_n2*(length_n2-1))
                #-----Calculating clustering coefficient is finished here!-------
                cutRatioN2_V4 = coherentCutRatio_V4(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes()),cc)
                cutRatioN2_V1 = coherentCutRatio_V1(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes()))
                cutRatioN2 = stat.mean([cutRatioN2_V4,cutRatioN2_V1])
                if cutRatioN2 < min_ratioN2:
                    min_ratioN2 = cutRatioN2
                    #calculate edge cut for this minimum cut ratio
                    cnp_nodes2 = induced_N2.nodes()
                    edge_cutN2 = edgeCutSet_V2(induced_N2,componentOfG)
                    minN2_v4 = cutRatioN2_V4
                    minN2_v1 = cutRatioN2_V4
            else:
                cut_node = my_cut_nodes(Complement_indN2, mixed_label)
                if (len(cut_node)==0):
                    print("for node ",v,"removing ",cut_node," is not enough and we need to remove more than K nodes")
                    return
                """"we may find differnt cut_node_set 
                    if we have more than one choose a set with minimum weight"""
                if (len(cut_node)>1): 
                    minweight = float('inf')
                    for n in cut_node:
                        """we calculate the score for each set seperately and then choose one with the minimum score"""
                        temp_N2 = induced_N2.copy()
                        temp_N2.remove_nodes_from(n)
                        #----------- Clustering Coefficient
                        length_n2 = len(temp_N2.nodes())
                        length_e2 = len(list(temp_N2.edges()))
                        if length_n2==1 or length_n2==0:
                            cc=0
                        else:
                            cc = 2*length_e2/(length_n2*(length_n2-1))
                        #-----Calculating clustering coefficient is finished here!-------
                        cutRatioN2_V4 = coherentCutRatio_V4(nx.degree(temp_N2),nx.degree(componentOfG,temp_N2.nodes()),cc)
                        cutRatioN2_V1 = coherentCutRatio_V1(nx.degree(temp_N2),nx.degree(componentOfG,temp_N2.nodes()))
                        temp_score = stat.mean([cutRatioN2_V4,cutRatioN2_V1])
                        if temp_score < minweight:
                            minweight = temp_score
                            minWeightNode = n
                    cut_node = minWeightNode
                else:
                    #flatten the cut node
                    cut_node = [node for sublist in cut_node for node in sublist]
                induced_N2.remove_nodes_from(cut_node)
                #----------- Clustering Coefficient
                length_n2 = len(induced_N2.nodes())
                length_e2 = len(list(induced_N2.edges()))
                if length_n2==1 or length_n2==0:
                    cc=0
                else:
                    cc = 2*length_e2/(length_n2*(length_n2-1))
                #-----Calculating clustering coefficient is finished here!-------
                cutRatioN2_V4 = coherentCutRatio_V4(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes()),cc)
                cutRatioN2_V1 = coherentCutRatio_V1(nx.degree(induced_N2),nx.degree(componentOfG,induced_N2.nodes()))
                cutRatioN2 = stat.mean([cutRatioN2_V4,cutRatioN2_V1])
                if cutRatioN2 < min_ratioN2:
                    min_ratioN2 = cutRatioN2
                    #calculate edge cut for this minimum cut ratio
                    cnp_nodes2 = induced_N2.nodes()
                    edge_cutN2 = edgeCutSet_V2(induced_N2,componentOfG)
                    minN2_v4 = cutRatioN2_V4
                    minN2_v1 = cutRatioN2_V4
        if min_ratioN1 < min_ratioN2:
            edge_cut.append(edge_cutN1)
            cnp_nodes = cnp_nodes1
        else:
            edge_cut.append(edge_cutN2)
            cnp_nodes = cnp_nodes2
        G_temp.remove_nodes_from(cnp_nodes)
        G_components = list(nx.connected_components(G_temp))
    edge_cut = [edge for sublist in edge_cut for edge in sublist]
    if not mixed_label:
        edge_cut = list(set(tuple(sorted(x)) for x in edge_cut))
    else:
        sorted_x = []
        for i in range(len(edge_cut)):
            intList=sorted([i for i in edge_cut[i] if type(i) is int])
            strList=sorted([i for i in edge_cut[i] if type(i) is str])
            sorted_x.append(intList + strList) 
        edge_cut =  list(set(tuple(i) for i in (sorted_x)))
    return(edge_cut)
for asin, metadata in amazonBooks.items():
    copurchaseGraph.add_node(asin)
    for a in metadata['Copurchased'].split():
        copurchaseGraph.add_node(a.strip())
        similarity = 0
        n1 = set((amazonBooks[asin]['Categories']).split())
        n2 = set((amazonBooks[a]['Categories']).split())
        n1In2 = n1 & n2
        n1Un2 = n1 | n2
        if (len(n1Un2)) > 0:
            similarity = round(len(n1In2) / len(n1Un2), 2)
        copurchaseGraph.add_edge(asin, a.strip(), weight=similarity)

# get degree centrality and clustering coefficients
# of each ASIN and add it to amazonBooks metadata
dc = networkx.degree(copurchaseGraph)
for asin in networkx.nodes(copurchaseGraph):
    metadata = amazonBooks[asin]
    metadata['DegreeCentrality'] = int(dc[asin])
    ego = networkx.ego_graph(copurchaseGraph, asin, radius=1)
    metadata['ClusteringCoeff'] = round(networkx.average_clustering(ego), 2)
    amazonBooks[asin] = metadata

# write amazonBooks data to file
# (all except copurchase data - becuase that data is now in the graph)
fhw = open('./amazon-books.txt', 'w', encoding='utf-8', errors='ignore')
fhw.write("Id\t" + "ASIN\t" + "Title\t" + "Categories\t" +
          "Group\t"  #+ "Copurchased\t" + 
          "SalesRank\t" + "TotalReviews\t" + "AvgRating\t"
          "DegreeCentrality\t" + "ClusteringCoeff\n")
for asin, metadata in amazonBooks.items():
Ejemplo n.º 36
0
def cal_nei_sim(disease, edges, save_nei_sim=False):

    print("begin to calculate similarity based on neighbours...")

    G = nx.Graph()
    G.add_edges_from(edges)  # 将多种生物信息构造成异构矩阵

    print(
        "step 1: epsilon -> 2, calculate first degree sequence and second degree sequence..."
    )
    DegreeSequence1 = []
    DegreeSequence2 = []

    for di in disease:

        neighboursOne = G.neighbors(di)  #获取节点的第一层邻居
        degreeOfOne = []
        neghboursTwo = []
        for indexOfNeighbours in neighboursOne:
            degreeOfOne.append(nx.degree(G,
                                         indexOfNeighbours))  #保存第一层邻居的degree
            neghboursTwo.extend(G.neighbors(indexOfNeighbours))  #获取第一层邻居节点的邻居
        sortedDegreeOfOne = sorted(degreeOfOne)  #对第一层邻居的degree进行排序
        DegreeSequence1.append(sortedDegreeOfOne)

        neghboursTwo = set(neghboursTwo)
        neghboursTwo.remove(di)  #去除二层邻居节点的自己

        degreeOfTwo = []
        for indexOfNeighbours in neghboursTwo:
            degreeOfTwo.append(nx.degree(G,
                                         indexOfNeighbours))  #保存第二层邻居的degree
        sortedDegreeOfTwo = sorted(degreeOfTwo)  #对第一层邻居的degree进行排序
        DegreeSequence2.append(sortedDegreeOfTwo)

    cores = multiprocessing.cpu_count()  # 获取计算机CPU数目
    pool = multiprocessing.Pool(cores)  # 构造一个线程池
    print("step 2: compute neighbour_sim in parallel with {} cpus...".format(
        cores))

    # 构造一个多线程的任务
    resultsOne = [
        pool.apply_async(dtw_distance_fast,
                         (DegreeSequence1[i], DegreeSequence1[j]))
        for i in range(0, len(DegreeSequence1))
        for j in range(i + 1, len(DegreeSequence1))
    ]

    # 将成对的第一层degree sequence计算结果存储到数组中
    arrOne = np.zeros((len(DegreeSequence1), len(DegreeSequence1)))
    i = 0
    j = 1
    for r in resultsOne:
        if j == len(DegreeSequence1):
            i += 1
            j = i + 1
        arrOne[i][j] = float(r.get())
        j += 1

    # 构造一个多线程任务
    resultsTwo = [
        pool.apply_async(dtw_distance_fast,
                         (DegreeSequence2[i], DegreeSequence2[j]))
        for i in range(0, len(DegreeSequence2))
        for j in range(i + 1, len(DegreeSequence2))
    ]

    # 将成对的第二层degree sequence计算结果存储到数组中
    arrTwo = np.zeros((len(DegreeSequence2), len(DegreeSequence2)))
    i = 0
    j = 1
    for r in resultsTwo:
        if j == len(DegreeSequence2):
            i += 1
            j = i + 1
        arrTwo[i][j] = float(r.get())
        j += 1

    # ----------------------------------------------------------------------------
    print("step 3: construct similarity matrix...")
    alpha = 0.5  # a decaying weight factor α in the range between 0 and 1
    NeiSim = {}
    sim_matrix = np.zeros((len(disease), len(disease)))
    for i in range(0, len(disease)):
        for j in range(i + 1, len(disease)):
            distance = math.pow(alpha, 1) * arrOne[i][j] + math.pow(
                alpha, 2) * arrTwo[i][j]
            NeiSim["{}\t{}".format(disease[i],
                                   disease[j])] = math.exp(-distance)
            sim_matrix[i][j] = math.exp(-distance)
            sim_matrix[j][i] = math.exp(-distance)

    if save_nei_sim:
        print("sort the path similarity and save...")
        res = sorted(NeiSim.items(), key=lambda x: x[1], reverse=True)
        FileUtil.writeSortedDic2File(res, "./nei_Sim.txt")

    return sim_matrix
Ejemplo n.º 37
0
jet_grad = np.linspace(0, 1, 256)  # Jet gradient for Old->New
cbar_ax.imshow(np.vstack((jet_grad, jet_grad)), aspect='auto', cmap=cm.jet)

fig.savefig(SAVE_FILE_NAME_IN_OUT, dpi=300)
fig.savefig(SAVE_FILE_NAME_IN_OUT_PNG, dpi=300)

# plot clustering vs degree and nodal efficiency
fig, axs = plt.subplots(1,
                        2,
                        figsize=(8, 3.75),
                        tight_layout=True,
                        facecolor='white')

cc_full = nx.clustering(G.to_undirected())
deg_full = nx.degree(G.to_undirected())
cc = [cc_full[node] for node in nodes]
deg = [deg_full[node] for node in nodes]

# calculate nodal efficiency
G.efficiency_matrix = metrics_bd.efficiency_matrix(G)
nodal_efficiency = np.sum(G.efficiency_matrix, axis=1) / (len(G.nodes()) - 1)

labels = ('a', 'b')

axs[0].scatter(deg, cc, c=node_ages, cmap=cm.jet, lw=0)
axs[0].set_xlim(0, 150)
axs[0].set_ylim(-0.025, 1.025)
axs[0].set_xlabel('Degree')
axs[0].set_ylabel('Clustering coefficient')
axs[0].locator_params(axis='x', nbins=6)
print(random_node)

print("Executing MHRW...")
sample = MHRW()
sample.mhrw(G, random_node, size)

print("Writing sample network...")
nx.write_edgelist(sample.G1,
                  "data/JS_sample_network_75.csv",
                  delimiter=",",
                  data=False)

G.clear()
G = sample.G1

DG = nx.degree(G)

num_nodes = 0
sum_degree = 0

for i in DG:
    num_nodes += 1
    sum_degree += i[1]

print("Grau da rede:", sum_degree)
print("Grau médio:", (sum_degree / num_nodes))
print("Nodes:", nx.number_of_nodes(G))
print("Edges:", nx.number_of_edges(G))
print("Density:", nx.density(G))

AC = nx.average_clustering(G)
Ejemplo n.º 39
0
def _dissolve_adjacent(_target_graph: nx.Graph,
                       _parent_node_name: str,
                       _node_group: Union[set, list, tuple],
                       highest_degree=False) -> nx.Graph:
    # set the new centroid from the centroid of the node group's Multipoint:
    node_geoms = []
    if not highest_degree:
        for n_uid in _node_group:
            x = _target_graph.nodes[n_uid]['x']
            y = _target_graph.nodes[n_uid]['y']
            node_geoms.append(geometry.Point(x, y))
    # if by highest_degree, then find the centroid of the highest degree nodes
    else:
        highest_degree = 0
        for n_uid in _node_group:
            if n_uid in _target_graph:
                if nx.degree(_target_graph, n_uid) > highest_degree:
                    highest_degree = nx.degree(_target_graph, n_uid)

        # aggregate the highest degree nodes
        node_geoms = []
        for n_uid in _node_group:
            if n_uid not in _target_graph:
                continue
            if nx.degree(_target_graph, n_uid) != highest_degree:
                continue
            x = _target_graph.nodes[n_uid]['x']
            y = _target_graph.nodes[n_uid]['y']
            # append geom
            node_geoms.append(geometry.Point(x, y))

    # find the new centroid
    c = geometry.MultiPoint(node_geoms).centroid
    _target_graph.add_node(_parent_node_name, x=c.x, y=c.y)

    # remove old nodes and reassign to new parent node
    # first determine new edges
    new_edges = []
    for uid in _node_group:
        for nb_uid in nx.neighbors(_target_graph, uid):
            # drop geoms between merged nodes
            # watch for self-loop edge cases
            if uid in _node_group and nb_uid in _node_group and uid != nb_uid:
                continue
            else:
                if 'geom' not in _target_graph[uid][nb_uid]:
                    raise KeyError(
                        f'Missing "geom" attribute for edge {uid}-{nb_uid}')
                line_geom = _target_graph[uid][nb_uid]['geom']
                if line_geom.type != 'LineString':
                    raise TypeError(
                        f'Expecting LineString geometry but found {line_geom.type} geometry for edge {uid}-{nb_uid}.'
                    )
                # first orient geom in correct direction
                s_x = _target_graph.nodes[uid]['x']
                s_y = _target_graph.nodes[uid]['y']
                # check geom coordinates directionality - flip if facing backwards direction
                if not np.allclose(
                    (s_x, s_y), line_geom.coords[0][:2], atol=0.001, rtol=0):
                    line_geom = geometry.LineString(line_geom.coords[::-1])
                # double check that coordinates now face the forwards direction
                if not np.allclose(
                    (s_x, s_y), line_geom.coords[0][:2], atol=0.001, rtol=0):
                    raise ValueError(
                        f'Edge geometry endpoint coordinate mismatch for edge {uid}-{nb_uid}'
                    )
                # update geom starting point to new parent node's coordinates
                coords = list(line_geom.coords)
                coords[0] = (c.x, c.y)
                # if self-loop, then the end also needs updating
                if uid == nb_uid:
                    coords[-1] = (c.x, c.y)
                    target_uid = _parent_node_name
                else:
                    target_uid = nb_uid
                new_line_geom = geometry.LineString(coords)
                new_edges.append(
                    (_parent_node_name, target_uid, new_line_geom))
    # remove the nodes from the target graph, this will also implicitly drop related edges
    _target_graph.remove_nodes_from(_node_group)
    # add the edges
    for s, e, geom in new_edges:
        # when dealing with a collapsed linestring, this should be a rare occurance
        if geom.length == 0:
            logger.warning(
                f'Encountered a geom of length 0m: check edge {s}-{e}.')
            continue
        # don't add edge duplicates from respectively merged nodes
        if (s, e) not in _target_graph.edges():
            _target_graph.add_edge(s, e, geom=geom)
        # however, do add if substantially different geom...
        else:
            diff = _target_graph[s][e]['geom'].length / geom.length
            if abs(diff) > 1.25:
                _target_graph.add_edge(s, e, geom=geom)

    return _target_graph
Ejemplo n.º 40
0
def de_clip(filename, n_nodes, hinge_list, gt_file):

    n_iter = 5

    f = open(filename)
    line1 = f.readline()
    print line1
    f.close()

    extension = filename.split('.')[-1]

    if extension == 'graphml':
        g = input3(filename)
    elif len(line1.split()) != 2:
        g = input1(filename)
    else:
        g = input2(filename)

    print nx.info(g)
    degree_sequence = sorted(g.degree().values(), reverse=True)
    print Counter(degree_sequence)

    degree_sequence = sorted(nx.degree(g).values(), reverse=True)
    print Counter(degree_sequence)

    try:
        import ujson
        mapping = ujson.load(open(gt_file))

        print 'getting mapping'
        mapped_nodes = 0
        print str(len(mapping))
        print str(len(g.nodes()))
        for node in g.nodes():
            # print node
            node_base = node.split("_")[0]
            # print node_base

            #print node
            if mapping.has_key(node_base):
                g.node[node]['aln_start'] = min(mapping[node_base][0][0],
                                                mapping[node_base][0][1])
                g.node[node]['aln_end'] = max(mapping[node_base][0][1],
                                              mapping[node_base][0][0])
                g.node[node]['chr'] = mapping[node_base][0][2]
                mapped_nodes += 1
            else:
                # pass
                g.node[node]['aln_start'] = 0
                g.node[node]['aln_end'] = 0
                g.node[node]['aln_strand'] = 0

        for edge in g.edges_iter():
            in_node = edge[0]
            out_node = edge[1]
            # print  'akjdfakjhfakljh'
            if ((g.node[in_node]['aln_start'] < g.node[out_node]['aln_start']
                 and
                 g.node[out_node]['aln_start'] < g.node[in_node]['aln_end']) or
                (g.node[in_node]['aln_start'] < g.node[out_node]['aln_end'] and
                 g.node[out_node]['aln_end'] < g.node[in_node]['aln_end'])):
                g.edge[in_node][out_node]['false_positive'] = 0
            else:
                g.edge[in_node][out_node]['false_positive'] = 1

    except:
        raise
        # print "json "+filename.split('.')[0]+'.mapping.json'+" not found. exiting."

    print hinge_list

    print str(mapped_nodes) + " out of " + str(len(
        g.nodes())) + " nodes mapped."

    # for i in range(5):
    #     merge_simple_path(g)
    #     degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    #     print Counter(degree_sequence)

    in_hinges = set()
    out_hinges = set()
    num_iter = 10000
    iter_done = 0
    if hinge_list != None:
        print "Found hinge list."
        with open(hinge_list, 'r') as f:
            for lines in f:
                lines1 = lines.split()

                if lines1[2] == '1':
                    in_hinges.add(lines1[0] + '_0')
                    out_hinges.add(lines1[0] + '_1')
                elif lines1[2] == '-1':
                    in_hinges.add(lines1[0] + '_1')
                    out_hinges.add(lines1[0] + '_0')

        print str(len(in_hinges)) + ' hinges found.'

        for node in g.nodes():
            if node in in_hinges and node in out_hinges:
                g.node[node]['hinge'] = 100
            elif node in in_hinges:
                g.node[node]['hinge'] = 10
            elif node in out_hinges:
                g.node[node]['hinge'] = -10
            else:
                g.node[node]['hinge'] = 0

        while len(g.nodes()) > n_nodes and iter_done < num_iter:
            node = g.nodes()[random.randrange(len(g.nodes()))]
            iter_done += 1
            # print iter_done
            if g.in_degree(node) == 1 and g.out_degree(node) == 1:

                base_node = node.split("_")[0]
                orintation = node.split("_")[1]
                # if orintation=='1':
                #     node2=base_node+'_0'
                # else:
                #     node2=base_node+'_1'

                # print node,node2

                in_node = g.in_edges(node)[0][0]
                out_node = g.out_edges(node)[0][1]

                if g.node[node]['hinge'] == 0 and g.node[in_node][
                        'hinge'] == 0 and g.node[out_node]['hinge'] == 0:
                    if g.out_degree(in_node) == 1 and g.in_degree(
                            out_node) == 1:
                        if in_node != node and out_node != node and in_node != out_node:
                            bad_node = False
                            # print g.in_edges(node)
                            # print g.edge[g.in_edges(node)[0][0]][g.in_edges(node)[0][1]]
                            # print g.out_edges(node)
                            for in_edge in g.in_edges(node):
                                if g.edge[in_edge[0]][
                                        in_edge[1]]['false_positive'] == 1:
                                    bad_node = True
                            for out_edge in g.out_edges(node):
                                if g.edge[out_edge[0]][
                                        out_edge[1]]['false_positive'] == 1:
                                    bad_node = True
                            if not bad_node:
                                #print in_node, node, out_node
                                merge_path(g, in_node, node, out_node)

                # print g.edge[edge1[0]][edge1[1]]['hinge_edge']

                for nd in g.nodes():
                    if len(nd.split("_")) == 1:
                        print nd + " in trouble"
                # in_node = g.in_edges(node2)[0][0]
                # out_node = g.out_edges(node2)[0][1]
                # if g.node[node2]['hinge']==0 and g.node[in_node]['hinge']==0  and g.node[out_node]['hinge']==0:
                #     if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1:
                #         if in_node != node2 and out_node != node2 and in_node != out_node:
                #             bad_node=False
                #             for in_edge in g.in_edges(node2):
                #                 if g.edge[in_edge]==1:
                #                     bad_node=True
                #             for out_edge in g.out_edges(node2):
                #                 if g.edge[out_edge]==1:
                #                     bad_node=True
                #             if not bad_node:
                #                 #print in_node, node, out_node
                #                 merge_path(g,in_node,node2,out_node)

            # for nd in g.nodes():
            #     print nd

    else:
        while len(g.nodes()) > n_nodes:

            node = g.nodes()[random.randrange(len(g.nodes()))]

            if g.in_degree(node) == 1 and g.out_degree(node) == 1:

                # assert g.in_degree(node2) == 1 and g.out_degree(node2) == 1
                # edge_1 = g.out_edges(node)[0]
                # edge_2 = g.in_edges(node)[0]

                edge1 = g.out_edges(node)[0]
                edge2 = g.in_edges(node)[0]

                # print g.edge[edge1[0]][edge1[1]]['hinge_edge']

                if (g.edge[edge1[0]][edge1[1]]['hinge_edge'] == -1
                        and g.edge[edge2[0]][edge2[1]]['hinge_edge'] == -1):

                    in_node = g.in_edges(node)[0][0]
                    out_node = g.out_edges(node)[0][1]
                    if g.out_degree(in_node) == 1 and g.in_degree(
                            out_node) == 1:
                        if in_node != node and out_node != node and in_node != out_node:
                            #print in_node, node, out_node
                            merge_path(g, in_node, node, out_node)

    degree_sequence = sorted(nx.degree(g).values(), reverse=True)
    print Counter(degree_sequence)

    nx.write_graphml(g, filename.split('.')[0] + '.sparse3.graphml')

    print nx.number_weakly_connected_components(g)
    print nx.number_strongly_connected_components(g)
Ejemplo n.º 41
0
def calculate_preferential_bias_on_node(network, node):
    return (float((nx.degree(network, node) + 1)) / (
        float(sum(nx.degree(network).values()) + nx.number_of_nodes(network))))
def main():
    event_data = np.genfromtxt('data/events_US_air_traffic_GMT.txt',
                               names=True,
                               dtype=int)
    event_data.sort(order=['StartTime'])
    network = nx.read_weighted_edgelist(
        'data/aggregated_US_air_traffic_network_undir.edg')
    n_nodes = network.number_of_nodes()

    # creation of bins for the plots
    min_timestemp = min(event_data, key=lambda item: item["StartTime"])[2]
    max_timestemp = max(event_data, key=lambda item: item["EndTime"])[3]
    n_bins = 50
    bins = create_bins(min_timestemp, max_timestemp, n_bins)

    ######################################
    #               task 1               #
    ######################################
    print("-------------- TASK 1 --------------")
    infection_times, infection_list = infection_time(event_data, 1, 0)
    print("Node 41 infection time: " + str(infection_times['41']) + " (" +
          str(datetime.fromtimestamp(infection_times['41'])) + ")")

    # animation of the infection
    # visualize_si(np.array(infection_list), save_fname="./simulations/infection_simulation_prob1_seed0.mp4")

    ######################################
    #               task 2               #
    ######################################
    print("-------------- TASK 2 --------------")
    seed_node = 0
    infection_prob = [0.01, 0.05, 0.1, 0.5, 1.0]
    infection_times_list_avg = []
    infection_times_list_probs = []
    for prob in infection_prob:
        for i in range(10):
            _, infection_list = infection_time(event_data, prob, seed_node)
            infection_times_list_avg.append(infection_list)
        infection_times_list_probs.append(infection_times_list_avg)
        infection_times_list_avg = []

    plot_avg_prevalence_probs(infection_times_list_probs, infection_prob,
                              n_nodes, bins)

    ######################################
    #               task 3               #
    ######################################
    print("-------------- TASK 3 --------------")
    infection_prob = 0.1
    seed_nodes = [0, 4, 41, 100, 200]
    seed_nodes_labels = ['ABE', 'ATL', 'ACN', 'HSV', 'DBQ']

    infection_times_list_avg = []
    infection_times_list_nodes = []
    for seed_node in seed_nodes:
        for i in range(10):
            _, infection_list = infection_time(event_data, infection_prob,
                                               seed_node)
            infection_times_list_avg.append(infection_list)
        infection_times_list_nodes.append(infection_times_list_avg)
        infection_times_list_avg = []

    plot_avg_prevalence_nodes(infection_times_list_nodes, seed_nodes_labels,
                              n_nodes, bins)

    ######################################
    #               task 4               #
    ######################################
    print("-------------- TASK 4 --------------")
    # ----- task 4 and 5 ----- #
    clustering_coefficient_net = nx.clustering(network)
    degree_net = nx.degree(network)
    strength_net = nx.degree(network, weight="weight")
    betweenness_centrality_net = nx.betweenness_centrality(network)
    # ------------------------ #

    infection_prob = 0.5
    infection_times_list = []
    for i in range(50):
        seed_node = random.randint(0, n_nodes)
        infection_times, _ = infection_time(event_data, infection_prob,
                                            seed_node)
        infection_times_list.append(infection_times)

    infection_times_df = pd.DataFrame(infection_times_list)
    infection_times_median = dict(infection_times_df.median())

    plot_and_spearman_task4(infection_times_median, clustering_coefficient_net,
                            degree_net, strength_net,
                            betweenness_centrality_net, n_nodes)

    ######################################
    #               task 5               #
    ######################################
    print("-------------- TASK 5 --------------")
    # nodes immunized
    imm_neighbour = []
    range_nodes = set(range(0, n_nodes))
    while len(imm_neighbour) < 10:
        rand_node = random.choice(list(range_nodes))
        rand_neighbour = random.choice(list(network.neighbors(str(rand_node))))
        if (int(rand_neighbour) not in imm_neighbour):
            imm_neighbour.append(int(rand_neighbour))

    imm_random_node = []
    range_nodes = set(range(0, n_nodes))
    for i in range(10):
        rand_node = random.choice(list(range_nodes))
        imm_random_node.append(rand_node)
        range_nodes.remove(rand_node)

    imm_clustering_coefficient = []
    d = Counter(clustering_coefficient_net)
    for k, _ in d.most_common(10):
        imm_clustering_coefficient.append(int(k))

    imm_degree = []
    highest_degree = sorted(degree_net, key=lambda x: x[1], reverse=True)[:10]
    for k, _ in highest_degree:
        imm_degree.append(int(k))

    imm_strength = []
    highest_strength = sorted(strength_net, key=lambda x: x[1],
                              reverse=True)[:10]
    for k, _ in highest_strength:
        imm_strength.append(int(k))

    imm_betweenness_centrality = []
    d = Counter(betweenness_centrality_net)
    for k, _ in d.most_common(10):
        imm_betweenness_centrality.append(int(k))

    # create a set of all the immunized nodes
    imm_nodes = set(imm_neighbour) | set(imm_random_node) | set(
        imm_clustering_coefficient) | set(imm_degree) | set(
            imm_strength) | set(imm_betweenness_centrality)
    range_seed = set(range(0, n_nodes)) - imm_nodes

    # extract the seed nodes from a set of nodes not part of the immunized ones
    seed_nodes = []

    for i in range(20):
        rand_seed = random.choice(list(range_seed))
        seed_nodes.append(rand_seed)
        range_seed.remove(rand_seed)

    immunized_nodes_list = []
    immunized_nodes_list.append(imm_neighbour)
    immunized_nodes_list.append(imm_random_node)
    immunized_nodes_list.append(imm_clustering_coefficient)
    immunized_nodes_list.append(imm_degree)
    immunized_nodes_list.append(imm_strength)
    immunized_nodes_list.append(imm_betweenness_centrality)
    immunization_strategy_labels = [
        'random neighbour', 'random node', 'clustering coefficient', 'degree',
        'strength', 'betweenness centrality'
    ]
    infection_prob = 0.5

    infection_times_list_avg = []
    infection_times_list_immunization = []
    for immunized_nodes, imm_strategy in zip(immunized_nodes_list,
                                             immunization_strategy_labels):
        print("Calculating " + imm_strategy)
        for seed_node in seed_nodes:
            _, infection_list = infection_time(event_data, infection_prob,
                                               seed_node, immunized_nodes)
            infection_times_list_avg.append(infection_list)
        infection_times_list_immunization.append(infection_times_list_avg)
        infection_times_list_avg = []

    plot_avg_prevalence_immunization(infection_times_list_immunization,
                                     immunization_strategy_labels, n_nodes,
                                     bins)

    ######################################
    #               task 6               #
    ######################################
    print("-------------- TASK 6 --------------")
    id_data = np.genfromtxt('data/US_airport_id_info.csv',
                            delimiter=',',
                            dtype=None,
                            names=True,
                            encoding=None)
    xycoords = {}
    for row in id_data:
        xycoords[str(row['id'])] = (row['xcoordviz'], row['ycoordviz'])

    edge_list = []
    for edge in network.edges():
        if int(edge[0]) > int(edge[1]):
            edge = (edge[1], edge[0])
        edge_list.append(edge)  # edge_list created to maintain the right order

    infection_prob = 0.5
    infecting_edges_fraction = []
    for i in range(20):
        seed_node = random.randint(0, n_nodes)
        infecting_edges = infection_edges(event_data, infection_prob,
                                          seed_node, edge_list)
        infecting_edges_fraction.append(infecting_edges)

    # calculation of the fraction of times that each link is used for infecting the disease from the results of 20 runs
    infecting_edges_fraction = (np.sum(np.array(infecting_edges_fraction), 0) /
                                20).tolist()

    # print Transmission links - fraction
    fig, ax = plot_network_usa(network,
                               xycoords,
                               edges=edge_list,
                               linewidths=infecting_edges_fraction)

    plt.suptitle(r'Transmission links ($f_{ij}$)')

    fig.savefig("./plots/t6_map_fraction.pdf")

    # print Transmission links - mst
    maximum_spanning_tree = nx.maximum_spanning_tree(network)
    fig, ax = plot_network_usa(maximum_spanning_tree,
                               xycoords,
                               edges=list(maximum_spanning_tree.edges))

    plt.suptitle(r'Transmission links (maximal spanning tree)')

    fig.savefig("./plots/t6_map_mst.pdf")

    link_weights = nx.get_edge_attributes(network, 'weight')
    link_betweenness_centrality = nx.edge_betweenness_centrality(network)

    # ordered lists (following the order of edge_list)
    link_weights_list = []
    link_betweenness_centrality_list = []
    for edge in edge_list:
        if edge in link_weights:
            link_weights_list.append(link_weights[edge])
        else:
            link_weights_list.append(link_weights[(edge[1], edge[0])])
        if edge in link_betweenness_centrality:
            link_betweenness_centrality_list.append(
                link_betweenness_centrality[edge])
        else:
            link_betweenness_centrality_list.append(
                link_betweenness_centrality[(edge[1], edge[0])])

    # scatter plot of the transmission fraction as a function of the link weight
    fig, ax = plot_scatterplot(link_weights_list, infecting_edges_fraction)
    plt.suptitle(r'Transmission fraction as a function of the link weight')
    ax.set_xlabel(r'link weight $w_{ij}$')
    ax.set_ylabel(r'transmission fraction $f_{ij}$')
    fig.savefig("./plots/t6_scatter_weight.pdf")

    # scatter plot of the transmission fraction as a function of the link betweenness centrality
    fig, ax = plot_scatterplot(link_betweenness_centrality_list,
                               infecting_edges_fraction)
    plt.suptitle(
        r'Transmission fraction as a function of the link betweenness centrality'
    )
    ax.set_xlabel(r'unweighted link betweenness centrality $eb_{ij}$')
    ax.set_ylabel(r'transmission fraction $f_{ij}$')
    fig.savefig("./plots/t6_scatter_bet_centr.pdf")

    # Spearman rank-correlation coefficient
    print(
        "Spearman rank-correlation coefficient between transmission fraction and: "
    )
    print("- link weight: " + str(
        spearmanr(link_weights_list, infecting_edges_fraction).correlation))
    print("- betweenness centrality: " + str(
        spearmanr(link_betweenness_centrality_list,
                  infecting_edges_fraction).correlation))
Ejemplo n.º 43
0
                #print(most_corelated_taxon[i][0], most_corelated_taxon[j][0])
                if not G.has_edge(i + 1, j + 1):
                    G.add_edge(i + 1, j + 1)
                #print(scipy.stats.spearmanr(predicted_data.loc[:, most_corelated_taxon[i][0]], predicted_data.loc[:,most_corelated_taxon[j][0]])[1],
                #     scipy.stats.spearmanr(predicted_data.loc[:, most_corelated_taxon[i][0]],
                #                          predicted_data.loc[:, most_corelated_taxon[j][0]])[0])

#nx.draw(G,  with_labels = True)
#print(nx.connected_components(G))
#print(nx.degree(G))
#print(sorted(i[1] for i in nx.degree(G)))
#print(nx.clustering(G))
#plt.show()
#print(G)
rel_dict = []
for i in nx.degree(G):
    #if i[1] != 0:
    print(i)
    print(labeldict[i[0]])
    rel_dict.append(labeldict[i[0]])

new_data = preproccessed_data[rel_dict]
#visualize_pca(new_data)
otu_after_pca, _ = apply_pca(new_data, n_components=2)
merged_data = otu_after_pca.join(OtuMf.mapping_file['DiagnosisGroup'])

merged_data.fillna(0)

mapping_disease_for_labels = {
    'Control': 0,
    'Cirrhosis/HCC': 1,
for node in nx.nodes(G):
    for tup in G.node[node]['conferences']:
        try:
            if tup[0] != conf:
                G2a.remove_node(node)
        except:
            continue
print("Done!")

# In[28]:

nx.info(G2a)

# In[41]:
'''Compute the degree for nodes which is the number of edges each node has.'''
dg_values = list(nx.degree(G2a))

# In[55]:

print(min(dg_values))
print(max(dg_values))

# In[44]:

#Degree histogram
sns.set_style("darkgrid")
sns.set_context({"figure.figsize": (6, 4)})
fig, ax = plt.subplots()
sns.distplot(dg_values, color="dodgerblue", bins=8, hist=True, kde=False)
plt.xlabel("Degree", fontsize=12)
plt.ylabel("Node frequences", fontsize=12)
Ejemplo n.º 45
0
def process(graph):
    if (not isinstance(graph, networkx.Graph)):
        raise ValueError(
            "invalid object; must be a NetworkX Graph class or subclass")

    if (isinstance(graph, networkx.DiGraph)):
        node_to_neighbors = lambda node: graph.successors(
            node) + graph.predecessors(node)
    else:
        node_to_neighbors = lambda node: graph.neighbors(node)

    node_to_degree = networkx.degree(graph)

    # phase 1: node assignment
    node_to_row, row_to_node = {}, []

    def node_to_row_(node):
        if (node in node_to_row):
            return node_to_row[node]
        row_idx = len(node_to_row)
        node_to_row[node] = row_idx
        row_to_node.append(node)
        return row_idx

    while True:
        # find the node with highest degree that has not been assigned a row
        hub, hub_degree = _key_with_max_value(node_to_degree, node_to_row)
        if (hub == None):
            break

        # add it as a new row
        node_to_row_(hub)

        # list its direct neighbors by decreasing degree
        hub_neighbors = reversed(
            sorted(node_to_neighbors(hub),
                   key=lambda node: node_to_degree[node]))

        # add them as new rows, if not assigned already
        for node in hub_neighbors:
            node_to_row_(node)

    assert len(node_to_row) == graph.number_of_nodes()  ###

    # phase 2: edge assignment
    edge_to_col, col_to_edge = {}, []

    def edge_to_col_(edge):
        if (edge in edge_to_col):
            return edge_to_col[edge]
        if (_order(*edge) in edge_to_col):
            return edge_to_col[_order(*edge)]
        edge_idx = len(edge_to_col)
        edge_to_col[edge] = edge_idx
        col_to_edge.append(edge)
        return edge_idx

    min_col, max_col = {}, {}
    for i, node_a in enumerate(row_to_node):
        for node_b in row_to_node[i + 1:]:
            if graph.has_edge(node_a, node_b):
                edge_idx = edge_to_col_((node_a, node_b))
                for node in (node_a, node_b):
                    min_col[node] = min(min_col.get(node, POS_INF), edge_idx)
                    max_col[node] = max(max_col.get(node, NEG_INF), edge_idx)

    assert len(edge_to_col) == graph.number_of_edges()  ###

    return (row_to_node, node_to_row), (col_to_edge,
                                        edge_to_col), min_col, max_col
Ejemplo n.º 46
0
def select_observers(network, strategy, proportion=0.1, tread_off=0.1):
    """
    :param network: 
    :param strategy: 
        "max_degree":
        "min_degree":
        "max_k_shell":
        "min_k_shell":
        "max_betweenness":
        "min_betweenness":
        "max_closeness":
        "min_closeness":
        "random":
    :param proportion: percentage of observers
    :param tread_off: mix strategy of max_degree and min_degree
    :return: 
    """
    observer_nodes_size = int(nx.number_of_nodes(G=network) * proportion)
    observers = []
    if strategy == "max_degree":
        degree_dict = dict(nx.degree(network))
        degree_sorted_by_value = sorted(degree_dict.items(),
                                        key=lambda x: x[1],
                                        reverse=True)
        observers = [x[0]
                     for x in degree_sorted_by_value][:observer_nodes_size]
    elif strategy == "min_degree":
        degree_dict = dict(nx.degree(network))
        degree_sorted_by_value = sorted(degree_dict.items(),
                                        key=lambda x: x[1])
        observers = [x[0]
                     for x in degree_sorted_by_value][:observer_nodes_size]
    elif strategy == "max_k_shell":
        k_core_dict = dict(nx.core_number(G=network))
        k_core_sorted = sorted(k_core_dict.items(),
                               key=lambda x: x[1],
                               reverse=True)
        observers = [x[0] for x in k_core_sorted][:observer_nodes_size]
    elif strategy == "min_k_shell":
        k_core_dict = dict(nx.core_number(G=network))
        k_core_sorted = sorted(k_core_dict.items(), key=lambda x: x[1])
        observers = [x[0] for x in k_core_sorted][:observer_nodes_size]
    elif strategy == "max_betweenness":
        between_centrality = dict(nx.betweenness_centrality(G=network))
        between_centrality_stored = sorted(between_centrality.items(),
                                           key=lambda x: x[1],
                                           reverse=True)
        observers = [x[0]
                     for x in between_centrality_stored][:observer_nodes_size]
    elif strategy == "min_betweenness":
        between_centrality = dict(nx.betweenness_centrality(G=network))
        between_centrality_stored = sorted(between_centrality.items(),
                                           key=lambda x: x[1])
        observers = [x[0]
                     for x in between_centrality_stored][:observer_nodes_size]
    elif strategy == "max_closeness":
        closeness_centrality = dict(nx.closeness_centrality(G=network))
        closeness_centrality_stored = sorted(closeness_centrality.items(),
                                             key=lambda x: x[1],
                                             reverse=True)
        observers = [x[0] for x in closeness_centrality_stored
                     ][:observer_nodes_size]
    elif strategy == "min_closeness":
        closeness_centrality = dict(nx.closeness_centrality(G=network))
        closeness_centrality_stored = sorted(closeness_centrality.items(),
                                             key=lambda x: x[1])
        observers = [x[0] for x in closeness_centrality_stored
                     ][:observer_nodes_size]
    elif strategy == "random":
        observers = np.random.randint(0, nx.number_of_nodes(network),
                                      observer_nodes_size)

    return observers
Ejemplo n.º 47
0
def fullSizeGraph(request):
    import pandas as pd
    import networkx
    import matplotlib.pyplot as plt
    import numpy as np

    df_enron = filterDataByTime(pd.read_csv(request,
                                            request.FILES['csv_data']))

    #from bokeh.io import output_notebook, show, save
    from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
    from bokeh.plotting import figure
    from bokeh.models.graphs import from_networkx
    from bokeh.palettes import Category10
    from bokeh.transform import linear_cmap
    from bokeh.embed import json_item

    #output_notebook() #remove this when not using notebook

    G = networkx.from_pandas_edgelist(df_enron,
                                      'fromId',
                                      'toId',
                                      edge_attr=True)

    di = {
        'CEO': 1,
        'Director': 2,
        'Employee': 3,
        'In House Lawyer': 4,
        'Manager': 5,
        'Managing Director': 6,
        'President': 7,
        'Trader': 8,
        'Unknown': 9,
        'Vice President': 10
    }
    df_rejob = df_enron.replace({"fromJobtitle": di})
    df_attributes = df_enron[['fromId', 'fromJobtitle']].drop_duplicates()
    df_attributes.columns = ['fromId', 'job']
    df_attributesx = df_rejob[['fromId', 'fromJobtitle']].drop_duplicates()
    job = df_attributes.set_index('fromId').to_dict('i')
    jobx = df_attributesx.set_index('fromId').to_dict('i')
    networkx.set_node_attributes(G, job)
    networkx.set_node_attributes(G, jobx)
    #jobs = ['Employee','Vice President','Unknown','Manager','CEO','Trader','Director','President','Managing Director','In House Lawyer']

    degrees = dict(networkx.degree(G))
    networkx.set_node_attributes(G, name='degree', values=degrees)
    adjusted_node_size = dict([(node, (degree + 5) - ((degree + 5) * 0.3))
                               for node, degree in networkx.degree(G)])
    networkx.set_node_attributes(G,
                                 name='adjusted_node_size',
                                 values=adjusted_node_size)

    size_by_this_attribute = 'adjusted_node_size'
    color_by_this_attribute = 'fromJobtitle'

    color_palette = Category10[10]

    TOOLTIPS = [
        ("Person ID", "@index"),
        ("people communicated with", "@degree"),
        ("Jobtitle", "@job"),
    ]

    plot = figure(tooltips=TOOLTIPS,
                  tools="pan,zoom_in,wheel_zoom,save,reset,box_select,undo",
                  active_scroll='wheel_zoom',
                  x_range=Range1d(-20, 20),
                  y_range=Range1d(-20, 20),
                  title='Enron Emails',
                  plot_width=950,
                  plot_height=950)
    plot.axis.visible = False

    N_graph = from_networkx(G, networkx.spring_layout, scale=100)

    N_graph.node_renderer.glyph = Circle(size=size_by_this_attribute,
                                         fill_color=linear_cmap(
                                             color_by_this_attribute,
                                             color_palette, 1, 10))

    N_graph.edge_renderer.glyph = MultiLine(line_alpha=10, line_width=1)

    plot.renderers.append(N_graph)

    item_text = json.dumps(json_item(plot))

    return django.http.JsonResponse(item_text, safe=False)
Ejemplo n.º 48
0
def make_interactive_network(G,
                             labels=False,
                             title='My Network',
                             color_palette=Blues8,
                             node_size='degree',
                             node_color='modularity_class'):

    from networkx.algorithms import community

    # Get network info
    degrees = dict(networkx.degree(G))
    networkx.set_node_attributes(G, name='degree', values=degrees)
    betweenness_centrality = networkx.betweenness_centrality(G)
    networkx.set_node_attributes(G,
                                 name='betweenness',
                                 values=betweenness_centrality)
    communities = community.greedy_modularity_communities(G)

    # Create empty dictionaries
    modularity_color = {}
    modularity_class = {}
    #Loop through each community in the network
    for community_number, community in enumerate(communities):
        #For each member of the community, add their community number and a distinct color
        for name in community:
            modularity_color[name] = Spectral8[community_number]
            modularity_class[name] = community_number

    networkx.set_node_attributes(G, modularity_color, 'modularity_color')
    networkx.set_node_attributes(G, modularity_class, 'modularity_class')

    #Choose colors for node and edge highlighting
    node_highlight_color = node_color
    edge_highlight_color = 'black'

    #Choose attributes from G network to size and color by — setting manual size (e.g. 10) or color (e.g. 'skyblue') also allowed

    #Pick a color palette — Blues8, Reds8, Purples8, Oranges8, Viridis8
    #color_palette = Blues8

    #Choose a title!
    title = title

    #Establish which categories will appear when hovering over each node
    HOVER_TOOLTIPS = [
        ("Character", "@index"),
        ("Degree", "@degree"),
        ("Modularity Class", "@modularity_class"),
        ("Modularity Color", "$color[swatch]:modularity_color"),
    ]

    #Create a plot — set dimensions, toolbar, and title
    plot = figure(tooltips=HOVER_TOOLTIPS,
                  tools="pan,wheel_zoom,save,reset",
                  active_scroll='wheel_zoom',
                  x_range=Range1d(-10.1, 10.1),
                  y_range=Range1d(-10.1, 10.1),
                  title=title)

    #Create a network graph object
    # https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.drawing.layout.spring_layout.html
    network_graph = from_networkx(G,
                                  networkx.spring_layout,
                                  scale=10,
                                  center=(0, 0))

    #Set node sizes and colors according to node degree (color as category from attribute)
    if node_color == 'degree':
        #Set node sizes and colors according to node degree (color as spectrum of color palette)
        minimum_value_color = min(
            network_graph.node_renderer.data_source.data[node_color])
        maximum_value_color = max(
            network_graph.node_renderer.data_source.data[node_color])
        network_graph.node_renderer.glyph = Circle(
            size=node_size,
            fill_color=linear_cmap(node_color, color_palette,
                                   minimum_value_color, maximum_value_color))
    elif node_color == 'modularity_color':
        #node_color='modularity_color'
        network_graph.node_renderer.glyph = Circle(size=node_size,
                                                   fill_color=node_color)
        #Set node highlight colors
        network_graph.node_renderer.hover_glyph = Circle(
            size=node_size, fill_color=node_highlight_color, line_width=2)
        network_graph.node_renderer.selection_glyph = Circle(
            size=node_size, fill_color=node_highlight_color, line_width=2)

    #Set edge opacity and width
    network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.3, line_width=1)
    #Set edge highlight colors
    network_graph.edge_renderer.selection_glyph = MultiLine(
        line_color=edge_highlight_color, line_width=2)
    network_graph.edge_renderer.hover_glyph = MultiLine(
        line_color=edge_highlight_color, line_width=2)

    #Highlight nodes and edges
    network_graph.selection_policy = NodesAndLinkedEdges()
    network_graph.inspection_policy = NodesAndLinkedEdges()

    plot.renderers.append(network_graph)

    if labels == True:
        #Add Labels
        x, y = zip(*network_graph.layout_provider.graph_layout.values())
        node_labels = list(G.nodes())
        source = ColumnDataSource({
            'x':
            x,
            'y':
            y,
            'name': [node_labels[i] for i in range(len(x))]
        })
        labels = LabelSet(x='x',
                          y='y',
                          text='name',
                          source=source,
                          background_fill_color='white',
                          text_font_size='10px',
                          background_fill_alpha=.7)
        plot.renderers.append(labels)

    show(plot)


#save(plot, filename=f"{title}.html")
Ejemplo n.º 49
0
def build(Time, dis, window, in_case):

    location = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/' + dis + '/'
    location_Time = '/Users/Abduljaleel/Desktop/project/USA/' + Time + '/'
    st = open(location_Time + 'log_where.txt', 'a')
    st.write(str(dis + '_' + Time) + '\n')
    db = '/Users/Abduljaleel/Desktop/project/USA/SQLite/USA_' + dis
    ddb = sqlite3.connect(db)
    cc = ddb.cursor()

    cc.execute("SELECT * FROM NODES order by date")
    ddb.commit()
    results = cc.fetchall()
    all = len(results)

    limit_1 = firstdate(ddb, cc)
    window_time = add_time(limit_1, window)
    limit_2 = add_time(limit_1, in_case)
    last_tweet_date = lastdate(ddb, cc)

    start = time.time()
    w = 1

    while True:
        cc.execute("select * from nodes where date between " + str(limit_1) +
                   " and " + str(limit_2) + " order by date")
        ddb.commit()
        rs = cc.fetchall()
        aa = []
        for i in range(0, len(rs)):
            aa.append(rs[i][0])

        # w = loop(Net1,aa,w)
        w = fib3.loop(Net1, aa, w)

        limit_2 = add_time(limit_2, in_case)

        if limit_2 > window_time:
            break
    limit_1 = next_event(limit_1, ddb, cc)

    w = 1
    while True:
        cc.execute("select * from nodes where date between " + str(limit_1) +
                   " and " + str(limit_2) + " order by date")
        ddb.commit()
        rs = cc.fetchall()
        aa = []

        try:
            d = int(rs[0][0])
            e = float(d * 100) / all
            print int(e)
        except:
            pr = 1

        for i in range(0, len(rs)):
            aa.append(rs[i][0])

        # w = loop(Net1,aa,w)
        w = fib3.loop(Net1, aa, w)

        limit_1 = add_time(limit_1, in_case)
        limit_2 = add_time(limit_1, window)

        if limit_1 >= last_tweet_date:
            break

    print 'finish creating'

    threshold = w * 0.79
    H = nx.Graph([(u, v, d) for (u, v, d) in Net1.edges_iter(data=True)
                  if d['weight'] > threshold])

    print '------degree start------'
    deg = open(location + 'degree.txt', 'w')
    DD = open(location + 'DD.txt', 'w')

    data = []
    DDD = H.degree()
    for s in DDD:
        deg.write(str(nx.degree(H, s)) + "\n")
        data.append(nx.degree(H, s))

    for i in range(0, max(data)):
        j = i + 1
        count = 0
        for k in range(0, len(data)):
            if data[k] == j:
                count += 1
        DD.write(str(j) + '\t' + str(count) + '\n')
    print '------degree finsished------'

    # st = open(location+'statistics.txt', 'a')
    # print 'cluster start'
    # cluster = nx.average_clustering(Net2)
    # print 'pl start'
    # pl = Net22.average_path_length()
    # n=0
    # summ=0
    # for g in nx.connected_component_subgraphs(Net2):
    #     summ+=float(nx.average_shortest_path_length(g))
    #     n+=1
    # summ = float(summ)/n
    # st.write('cluster  :'+str(cluster)+'\n')
    # st.write('path_Len :'+str(summ)+'\n')

    print '------Writing Graphml start------'
    nx.write_graphml(H, location + dis + "_graph.graphml")

    sts = open(location_Time + 'log_sec.txt', 'a')
    end = time.time() - start
    sts.write(str(dis + '_' + Time) + '\t' + str(end) + '\n')
Ejemplo n.º 50
0
def read_a_graph(path):
    print path
    type = str(path[-4:])
    onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
    # print onlyfiles
    all_graphs = {}
    for f in onlyfiles:
        G = nx.read_edgelist(path + '/' + f, delimiter=" ")
        print "---------" + f + "----------"
        print "BEFORE"
        print "nodes:", len(G)
        print "edges:", G.number_of_edges()
        if G.has_node('-1'):
            G.remove_node('-1')
        print "AFTER"
        print "nodes:", len(G)
        print "edges:", G.number_of_edges()
        if G.number_of_edges() >= (len(G) / 4):
            print "TAKE THIS ONE: ", "-", type, "-", f
        ###########CHECK HOW MANY ACCOUNTS OF OUR LIST IS IN THE TWEET CHAIN##########
        involved = []
        if type == "fake":
            accounts = fake_accounts
        else:
            accounts = real_accounts
        for node in G.nodes():
            if int(node) in accounts:
                involved.append(int(node))
        print "INVOLVED USERS:", len(involved)
        print involved
        #######################################################
        density = nx.density(G)
        degree = nx.degree(G)
        bc = nx.betweenness_centrality(G)
        cc = nx.closeness_centrality(G)
        graph = {"density": density, "degree": degree, "bc": bc, "cc": cc}
        all_graphs[f] = graph
        print "--------------------------"
        # plt.title(f)
        # nx.draw(G)
        # plt.show()
    # print all_graphs
    for g in all_graphs:
        print g
        cc_low = []
        cc_mid = []
        cc_high = []

        bc_low = []
        bc_mid = []
        bc_high = []

        degree_low = []
        degree_mid = []
        degree_high = []

        list_of_nodes = []
        for n in all_graphs[g]['cc']:
            list_of_nodes.append(n.encode("utf-8"))
        # for every characteristic split in 4
        for char in all_graphs[g]:
            if char is not 'density':
                low, mid, high = get_four_split(char, all_graphs[g][char])
                if char is 'cc':
                    cc_low = low
                    cc_mid = mid
                    cc_high = high
                elif char is 'bc':
                    bc_low = low
                    bc_mid = mid
                    bc_high = high
                elif char is 'degree':
                    degree_low = low
                    degree_mid = mid
                    degree_high = high
        classified = []
        for n in list_of_nodes:
            if n in get_a_list_of_the_first_of_a_tuple(
                    cc_low) and n in get_a_list_of_the_first_of_a_tuple(
                        degree_high):
                classified.append((n, "A"))
            elif n in get_a_list_of_the_first_of_a_tuple(
                    degree_high) and n in get_a_list_of_the_first_of_a_tuple(
                        bc_low):
                classified.append((n, "B"))
            elif n in get_a_list_of_the_first_of_a_tuple(
                    cc_high) and n in get_a_list_of_the_first_of_a_tuple(
                        degree_low):
                classified.append((n, "C"))
            elif n in get_a_list_of_the_first_of_a_tuple(
                    cc_high) and n in get_a_list_of_the_first_of_a_tuple(
                        bc_low):
                classified.append((n, "D"))
            elif n in get_a_list_of_the_first_of_a_tuple(
                    bc_high) and n in get_a_list_of_the_first_of_a_tuple(
                        degree_low):
                classified.append((n, "E"))
            elif n in get_a_list_of_the_first_of_a_tuple(
                    bc_high) and n in get_a_list_of_the_first_of_a_tuple(
                        cc_low):
                classified.append((n, "F"))
            elif n in get_a_list_of_the_first_of_a_tuple(bc_high):
                classified.append((n, "G"))
            else:
                classified.append((n, "-"))
        with open(g + ".txt", "w") as text_file:
            for n in classified:
                print n
                text_file.write(str(n) + '\n')
        print "______________________________"
    counter = 0
    sum = 0
    for g in all_graphs:
        sum = all_graphs[g]['density']
        counter = counter + 1
    avg = sum / counter
    print "average density is", avg
Ejemplo n.º 51
0
def get_link_measures(net):
    """
    Compute weights and edges betweenness centralities

    :param net:  network
    :return: w: list of weights
             eb: list of edge betweeenness centralities
    """
    w, eb, eb_w, eb_w2, eb_pr, eb_cl, eb_ev, eb_s = [], [], [], [], [], [], [], []

    # Edge weight and unweighted betweenness centrality
    edges = net.edges(data=True)
    betweenness_centr = nx.edge_betweenness_centrality(net, normalized=True)
    for e in edges:
        w.append(e[2]['weight'])
        eb.append(betweenness_centr[(e[0], e[1])])

    # Create a copy of the graph with inverse weights, square root is used to reduce the impact of high weights
    net1 = net.copy()
    edges1 = net1.edges(data=True)

    for e in edges1:
        w_e = e[2]['weight']
        net1[e[0]][e[1]]['weight'] = 1 / (w_e**(1 / 3))

    # Weighted betweenness centrality on net1
    betweenness_centr_w = nx.edge_betweenness_centrality(net1,
                                                         normalized=True,
                                                         weight='weight')

    for e in edges1:
        eb_w.append(betweenness_centr_w[(e[0], e[1])])

    # Node dictionary for k-shells
    dict_k_shell = {}
    max_degree = max([net.degree(n) for n in net.nodes])

    for k in reversed(range(max_degree + 1)):
        k_shell = nx.k_shell(net1, k=k)
        k_shell_nodes = k_shell.nodes()
        for i in k_shell_nodes:
            if i not in dict_k_shell:
                dict_k_shell[i] = k

    # node dict for pagerank
    dict_page_rank = nx.pagerank(net1, weight='weight')

    # closeness centrality
    closeness_centr = nx.closeness_centrality(net, distance='weight')
    closeness_centr = dict(
        sorted(closeness_centr.items(),
               key=lambda pair: list(nodes).index(pair[0])))
    # eigenvector centrality
    eigenvector_centr = nx.eigenvector_centrality(net,
                                                  tol=10**-1,
                                                  weight='weight')
    eigenvector_centr = dict(
        sorted(eigenvector_centr.items(),
               key=lambda pair: list(nodes).index(pair[0])))
    # strengths of nodes
    strengths = dict(nx.degree(net1, weight='weight'))

    # For each edge, take lower value of centrality measureof the two nodes and use it to normalize previously
    # computed weighted betweenness
    j = 0
    for e in edges:
        eb_w2.append(eb_w[j] / min(dict_k_shell[e[0]], dict_k_shell[e[1]]))
        eb_pr.append(eb_w[j] / min(dict_page_rank[e[0]], dict_page_rank[e[1]]))
        eb_cl.append(eb_w[j] /
                     min(closeness_centr[e[0]], closeness_centr[e[1]]))
        eb_ev.append(eb_w[j] /
                     min(eigenvector_centr[e[0]], eigenvector_centr[e[1]]))
        eb_s.append(eb_w[j] / min(strengths[e[0]], strengths[e[1]]))
        j = j + 1

    return w, eb, eb_w, eb_w2, eb_pr, eb_cl, eb_ev, eb_s
Ejemplo n.º 52
0
import networkx as nx
import matplotlib.pyplot as plt
from examples.drawing.plot_edge_colormap import colors

n = 10  # 10 nodes
m = 20  # 20 edges

G = nx.gnm_random_graph(n, m)

# some properties
print("node degree clustering")
for v in nx.nodes(G):
    print('%s %d %f' % (v, nx.degree(G, v), nx.clustering(G, v)))

# print the adjacency list
print("print the adjacency list")
for line in nx.generate_adjlist(G):
    print(line)

nx.draw(G, node_size=250, with_labels=True)
plt.show()
Ejemplo n.º 53
0
# %%
with open('direct_conections_df.pkl', 'rb') as file:
    data = pickle.load(file)
Conexiones = pd.DataFrame(data)
Conex_1 = Conexiones.iloc[0:200, 0:6]
# %%
G = nx.from_pandas_edgelist(Conex_1,
                            source='p_cty_code',
                            target='d_cty_code',
                            create_using=nx.DiGraph())
from matplotlib.pyplot import figure
figure(figsize=(12, 9))
#nx.draw_shell(G,with_labels=True)
#nx.draw_circular(G)

nx.degree(G)

#si quisieramos crear otro df que muestre los nodos y su numero de conexiones usamos:
conexion = {}
for x in G.nodes:
    conexion[x] = len(G[x])
s = pd.Series(conexion, name='Conexiones')
df2 = s.to_frame().sort_values('Conexiones', ascending=False)
#%%
# Density
nx.density(G)

# Clustering
nx.clustering(G)
# Similar al comando anterior
for i in nx.clustering(G).items():
Ejemplo n.º 54
0
    def most_similar_result_with_newwork(self, data_size, topn):
        print('   -> creating most similar job matrix with network')
        df = pd.DataFrame()
        G = nx.karate_club_graph()

        i = 0
        keys_list = list(self.doc2idx.keys())
        nodes = []  ## node list
        edges = []  ## edge list(튜플의 형태로 저장)

        for job_id in keys_list:
            node_id = str(job_id).split('_')[2]
            job_id = 'Job_ID_' + str(job_id).split('_')[2]

            title = self.get_job_title(job_id)[0]
            title = f'{title}({str(job_id)})'

            similar_jobs = self.model.docvecs.most_similar(job_id,
                                                           topn=len(keys_list))

            sim_list = []
            for sim_job_id, score in similar_jobs:
                if score >= 0.8:
                    nodes.append(node_id)  ## node list
                    sim_job_titles = self.get_job_title(sim_job_id)[0]
                    sim_job_id = sim_job_id.split('_')[2]
                    input = f'{sim_job_titles}({sim_job_id})'
                    sim_list.append(input)
                    temp_tuple = (node_id, sim_job_id, score)
                    edges.append(temp_tuple)

                else:
                    sim_list.append('')
            i = i + 1
            df.loc[:, title] = pd.Series(sim_list)

        df.to_csv(self.model_path + self.data_name + '_sim_title_result.csv',
                  mode='w',
                  encoding='utf-8')
        nodes = set(nodes)
        nodes = list(nodes)
        print(len(nodes))
        print(nodes[:])
        print(edges[:])
        G.add_nodes_from(nodes)
        G.add_weighted_edges_from(edges)

        degree = nx.degree(G)
        print(degree)
        plt.figure(figsize=(20, 10))
        graph_pos = nx.spring_layout(G, k=0.42, iterations=17)
        nx.draw_networkx_labels(G,
                                graph_pos,
                                font_size=10,
                                font_family='sans-serif')
        # nx.draw_networkx_nodes(G, graph_pos, node_size=[ var * 50 for var in degree], cmap='jet')
        nx.draw_networkx_edges(G, graph_pos, edge_color='gray')
        nx.draw(G,
                node_size=[100 + v[1] * 100 for v in degree],
                with_labels=True)
        plt.show()
        return df
Ejemplo n.º 55
0
@author: samic
"""

import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#block = pd.read_csv('block1.csv')
#block = pd.read_csv('block2.csv')
#block = pd.read_csv('block3.csv')
block = pd.read_csv('block4.csv')

block.drop(block.columns[[1]], axis=1, inplace=True)

toGraph = block.stack().reset_index()

del block

toGraph.columns = ['var1', 'var2', 'value']

toGraph_filtered = toGraph.loc[(toGraph['value'] > .8)
                               & (toGraph['var1'] != toGraph['var2'])]
G = nx.from_pandas_edgelist(toGraph_filtered, 'var1', 'var2')

#g = nx.draw(G, node_size=10)

# centrality
dictNodes = nx.eigenvector_centrality(G, max_iter=1000)
deg = nx.degree(G)
Ejemplo n.º 56
0
            #sys.exit(1)

        # basic graphs checks
        small_graph = Gp.networkx_graph(data_vertices, data_adjacency)
        big_graph = Gp.networkx_graph(struct_vertices, struct_adjacency)

        Gp.check_graphs_size(small_graph, big_graph)
        Gp.check_graphs_labels(small_graph, big_graph)
        print "Basic NOE graph analysis:\n"
        print "N (nodes, NOE graph) = ", len(small_graph.nodes())
        print "N (edges, NOE graph) = ", len(small_graph.edges())
        print "N (nodes, PDB graph) = ", len(big_graph.nodes())
        print "N (edge, PDB graph) = ", len(big_graph.edges())
        print "NOE sparsity >> ", len(small_graph.edges()) / float(
            len(big_graph.edges()))
        print "average degree >> ", np.mean(nx.degree(small_graph).values())
        print "median degree >>", np.median(nx.degree(small_graph).values())

        return data_vertices, data_adjacency, struct_vertices, struct_adjacency

    def subgraph_isomorphism(self, noe_vertices, noe_adjacency,
                             structure_vertices, structure_adjacency, Gp, tag):

        graph_noe, graph_noe_indexing = Gp.igraph_graph(
            noe_vertices, noe_adjacency)
        graph_structure, graph_structure_indexing = Gp.igraph_graph(
            structure_vertices, structure_adjacency)
        EP = IgraphSubIso(
        )  #    instance of a class for subgraph isomorphism check and extraction
        start_vf2 = time.time(
        )  # measure how long it takes for subgraph isomorphism to be evaluated
Ejemplo n.º 57
0
def bethe_hessian_matrix(G, r=None, nodelist=None):
    r"""Returns the Bethe Hessian matrix of G.

    The Bethe Hessian is a family of matrices parametrized by r, defined as
    H(r) = (r^2 - 1) I - r A + D where A is the adjacency matrix, D is the
    diagonal matrix of node degrees, and I is the identify matrix. It is equal
    to the graph laplacian when the regularizer r = 1.

    The default choice of regularizer should be the ratio [2]

    .. math::
      r_m = \left(\sum k_i \right)^{-1}\left(\sum k_i^2 \right) - 1

    Parameters
    ----------
    G : Graph
       A NetworkX graph

    r : float
       Regularizer parameter

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().


    Returns
    -------
    H : scipy.sparse.csr_matrix
      The Bethe Hessian matrix of G, with paramter r.

    Examples
    --------
    >>> k = [3, 2, 2, 1, 0]
    >>> G = nx.havel_hakimi_graph(k)
    >>> H = nx.modularity_matrix(G)


    See Also
    --------
    bethe_hessian_spectrum
    adjacency_matrix
    laplacian_matrix

    References
    ----------
    .. [1] A. Saade, F. Krzakala and L. Zdeborová
       "Spectral clustering of graphs with the bethe hessian",
       Advances in Neural Information Processing Systems. 2014.
    .. [2] C. M. Lee, E. Levina
       "Estimating the number of communities in networks by spectral methods"
       arXiv:1507.00827, 2015.
    """
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    if nodelist is None:
        nodelist = list(G)
    if r is None:
        r = sum(d**2
                for v, d in nx.degree(G)) / sum(d for v, d in nx.degree(G)) - 1
    A = nx.to_scipy_sparse_array(G, nodelist=nodelist, format="csr")
    n, m = A.shape
    # TODO: Rm csr_array wrapper when spdiags array creation becomes available
    D = sp.sparse.csr_array(
        sp.sparse.spdiags(A.sum(axis=1), 0, m, n, format="csr"))
    # TODO: Rm csr_array wrapper when eye array creation becomes available
    I = sp.sparse.csr_array(sp.sparse.eye(m, n, format="csr"))
    import warnings

    warnings.warn(
        "bethe_hessian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: Remove the csr_matrix wrapper in NetworkX 3.0
    return sp.sparse.csr_matrix((r**2 - 1) * I - r * A + D)
Ejemplo n.º 58
0
#     ------ 网络挖掘 ------  
# 通常我们分析的数据是以网络结构存储的,我们可以使用点和边描述之间的关系  
# 本章中我们将会介绍分析此类数据的基本步骤,称为图论,一个帮助我们创造、处理和研究网络的类库  
# 尤其我们将会介绍如何使用特定方法建立有意义的数据可视化,以及如何建立一组关联稠密的点  
# 使用图论可以让我们很容易的导入用于描述数据结构的最常用结构  
import networkx as nx  
G = nx.read_gml("/Users/liding/E/Bdata/ptemp/liding/lesmiserables.gml")      # networkx 必须要下载 1.9.1 版本才行  
# 在上述代码我们导入了《悲惨世界》同时出现的单词组成的网络,可以通过https://gephi.org/datasets/lesmiserables.gml.zip免费  
# 下载,数据以GML格式存储。我们还可以使用下面的命令导入并可视化网络:  
nx.draw(G, node_size=0, edge_color="b", alpha=.2, font_size=7)


import igraph.test 

#链接度
deg = nx.degree(G)
from numpy import percentile, mean, median
print min(deg.values())
print percentile(deg.values(),25) # computes the 1st quartile
print median(deg.values())
print percentile(deg.values(),75) # computes the 3rd quartile
print max(deg.values())10 

#挑选连接度大于10的案例
Gt = G.copy()
dn = nx.degree(Gt)
for n in Gt.nodes():
 if dn[n] <= 10:
  Gt.remove_node(n)
nx.draw(Gt,node_size=0,edge_color='b',alpha=.2,font_size=12)
Ejemplo n.º 59
0
def nX_remove_filler_nodes(networkX_graph: nx.Graph) -> nx.Graph:
    if not isinstance(networkX_graph, nx.Graph):
        raise TypeError('This method requires an undirected networkX graph.')

    logger.info(f'Removing filler nodes.')
    g_copy = networkX_graph.copy()
    removed_nodes = set()

    def manual_weld(_G, _start_node, _geom_a, _geom_b):
        s_x = _G.nodes[_start_node]['x']
        s_y = _G.nodes[_start_node]['y']
        # check geom coordinates directionality - flip to wind in same direction
        # i.e. _geom_a should start at _start_node whereas _geom_b should end at _start_node
        if not np.allclose(
            (s_x, s_y), _geom_a.coords[0][:2], atol=0.001, rtol=0):
            _geom_a = geometry.LineString(_geom_a.coords[::-1])
        if not np.allclose(
            (s_x, s_y), _geom_b.coords[-1][:2], atol=0.001, rtol=0):
            _geom_b = geometry.LineString(_geom_b.coords[::-1])
        # now concatenate
        _new_agg_geom = geometry.LineString(
            list(_geom_a.coords) + list(_geom_b.coords))
        # check
        assert np.allclose(_new_agg_geom.coords[0], (s_x, s_y),
                           atol=0.001,
                           rtol=0)
        assert np.allclose(_new_agg_geom.coords[-1], (s_x, s_y),
                           atol=0.001,
                           rtol=0)
        return _new_agg_geom

    def recursive_weld(_G, start_node, agg_geom, agg_del_nodes, curr_node,
                       next_node):

        # if the next node has a degree of 2, then follow the chain
        # for disconnected components, check that the next node is not back at the start node...
        if nx.degree(_G, next_node) == 2 and next_node != start_node:
            # next node becomes new current
            _new_curr = next_node
            # add next node to delete list
            agg_del_nodes.append(next_node)
            # get its neighbours
            _a, _b = list(nx.neighbors(networkX_graph, next_node))
            # proceed to the new_next node
            if _a == curr_node:
                _new_next = _b
            else:
                _new_next = _a
            # get the geom and weld
            if 'geom' not in _G[_new_curr][_new_next]:
                raise KeyError(
                    f'Missing "geom" attribute for edge {_new_curr}-{_new_next}'
                )
            new_geom = _G[_new_curr][_new_next]['geom']
            if new_geom.type != 'LineString':
                raise TypeError(
                    f'Expecting LineString geometry but found {new_geom.type} geometry.'
                )
            # when welding an isolated circular component, the ops linemerge will potentially weld onto the wrong end
            # i.e. start-side instead of end-side... so orient and merge manually
            if _new_next == start_node:
                _new_agg_geom = manual_weld(_G, start_node, new_geom, agg_geom)
            else:
                _new_agg_geom = ops.linemerge([agg_geom, new_geom])
            if _new_agg_geom.type != 'LineString':
                raise TypeError(
                    f'Found {_new_agg_geom.type} geometry instead of "LineString" for new geom {_new_agg_geom.wkt}.'
                    f'Check that the adjacent LineStrings in the vicinity of {curr_node}-{next_node} are not corrupted.'
                )
            return recursive_weld(_G, start_node, _new_agg_geom, agg_del_nodes,
                                  _new_curr, _new_next)
        else:
            end_node = next_node
            return agg_geom, agg_del_nodes, end_node

    # iterate the nodes and weld edges where encountering simple intersections
    # use the original graph so as to write changes to new graph
    for n in tqdm(networkX_graph.nodes(), disable=checks.quiet_mode):

        # some nodes will already have been removed via recursive function
        if n in removed_nodes:
            continue

        if nx.degree(networkX_graph, n) == 2:

            # get neighbours and geoms either side
            nb_a, nb_b = list(nx.neighbors(networkX_graph, n))

            # geom A
            if 'geom' not in networkX_graph[n][nb_a]:
                raise KeyError(f'Missing "geom" attribute for edge {n}-{nb_a}')
            geom_a = networkX_graph[n][nb_a]['geom']
            if geom_a.type != 'LineString':
                raise TypeError(
                    f'Expecting LineString geometry but found {geom_a.type} geometry.'
                )
            # start the A direction recursive weld
            agg_geom_a, agg_del_nodes_a, end_node_a = recursive_weld(
                networkX_graph, n, geom_a, [], n, nb_a)

            # only follow geom B if geom A doesn't return an isolated (disconnected) looping component
            # e.g. circular disconnected walkway
            if end_node_a == n:
                logger.warning(
                    f'Disconnected looping component encountered around {n}')
                # in this case, do not remove the starting node because it suspends the loop
                g_copy.remove_nodes_from(agg_del_nodes_a)
                removed_nodes.update(agg_del_nodes_a)
                g_copy.add_edge(n, n, geom=agg_geom_a)
                continue

            # geom B
            if 'geom' not in networkX_graph[n][nb_b]:
                raise KeyError(f'Missing "geom" attribute for edge {n}-{nb_b}')
            geom_b = networkX_graph[n][nb_b]['geom']
            if geom_b.type != 'LineString':
                raise TypeError(
                    f'Expecting LineString geometry but found {geom_b.type} geometry.'
                )
            # start the B direction recursive weld
            agg_geom_b, agg_del_nodes_b, end_node_b = recursive_weld(
                networkX_graph, n, geom_b, [], n, nb_b)

            # remove old nodes - edges are removed implicitly
            agg_del_nodes = agg_del_nodes_a + agg_del_nodes_b
            # also remove origin node n
            agg_del_nodes.append(n)
            g_copy.remove_nodes_from(agg_del_nodes)
            removed_nodes.update(agg_del_nodes)

            # merge the lines
            # disconnected self-loops are caught above per geom a, i.e. where the whole loop is degree == 2
            # however, lollipop scenarios are not, so weld manually
            # lollipop scenarios are where a looping component (all degrees == 2) suspends off a node with degree > 2
            if end_node_a == end_node_b:
                merged_line = manual_weld(networkX_graph, end_node_a,
                                          agg_geom_a, agg_geom_b)
            else:
                merged_line = ops.linemerge([agg_geom_a, agg_geom_b])

            # run checks
            if merged_line.type != 'LineString':
                raise TypeError(
                    f'Found {merged_line.type} geometry instead of "LineString" for new geom {merged_line.wkt}. '
                    f'Check that the adjacent LineStrings for {nb_a}-{n} and {n}-{nb_b} actually touch.'
                )

            # add new edge
            g_copy.add_edge(end_node_a, end_node_b, geom=merged_line)

    return g_copy
Ejemplo n.º 60
0
            try:
                E[rA, rB] += 1
                E[rB, rA] += 1
            except KeyError:
                E[rA, rB] = 1
                E[rB, rA] = 1
        if rA and rB:
            try:
                N.edge[rA][rB]['fweight'] += data['occurrence']
                N.edge[rA][rB]['weight'] += 1
            except KeyError:
                N.add_edge(rA, rB, fweight=data['occurrence'], weight=1)

    write_gml(N, 'networks/rimes.gml')

    deg = nx.degree(N, weight='weight')
    with open('stats/rime_degree.tsv', 'w') as f:
        f.write('Rime\tDegree\n')
        for n,w in sorted(deg.items(), key=lambda x: x[1], reverse=True):
            f.write(n+'\t'+str(w)+'\n')

    with open('stats/edges_rimes.tsv', 'w') as f:
        f.write('RimeA\tRimeB\tOccurrence\n')
        for (a,b),c in sorted(E.items(), key=lambda x: x[1], reverse=True):
            f.write(a+'\t'+b+'\t'+str(c)+'\n')
    
if 'triples' in argv:
    triples = []
    visited = []
    # make subgraph consisting only of nrj-cases
    for nA,dA in G.nodes(data=True):