def tree_decoding_algorithm_content_and_function(s, headrules, reverse=True, ablation="pagerank"):
    # This is the algorithm in Fig 3 in Søgaard(2012).

    if ablation == "pagerank":
        rankedindices = [x for x in s.nodes()]
    else:

        personalization = dict(
            [[x, 1] for x in s.nodes() if s.node[x]["cpostag"] in CONTENT]
            + [[x, 1] for x in s.nodes() if s.node[x]["cpostag"] not in CONTENT]
        )
        ALLVERBS = sorted([n for n in s.nodes() if s.node[n]["cpostag"] == "VERB"])
        ALLCONTENT = sorted([n for n in s.nodes() if s.node[n]["cpostag"] in CONTENT])

        if ALLVERBS:
            personalization[ALLVERBS[0]] = 5
        elif ALLCONTENT:
            personalization[ALLCONTENT[0]] = 5
        if reverse:
            rev_s = nx.reverse(nx.DiGraph(s))
        else:
            rev_s = nx.DiGraph(s)
        rankdict = nx.pagerank_numpy(rev_s, alpha=0.95, personalization=personalization)
        rankedindices = [k for k, v in Counter(rankdict).most_common()]

    H = set()
    D = set()

    contentindices = [x for x in rankedindices if s.node[x]["cpostag"] in CONTENT]
    functionindices = [x for x in rankedindices if x not in contentindices]
    print(contentindices)
    for (
        i
    ) in (
        contentindices
    ):  # We attach elements from highest to lowest, i.e. the word with the highest PR will be the dependent of root
        if len(H) == 0:
            n_j_prime = 0
        else:
            n_j_prime_index_headrules = None
            POS_i = s.node[i]["cpostag"]
            possible_headsin_table = list(headrules[headrules["dep"] == POS_i]["head"].values)
            H_headrules = [h for h in H if s.node[h]["cpostag"] in possible_headsin_table]

            if H_headrules:
                n_j_prime_index_headrules = np.argmin([abs(i - j) for j in sorted(H_headrules)])  # find the head of i
                n_j_prime = sorted(H_headrules)[n_j_prime_index_headrules]

            if not n_j_prime_index_headrules:
                n_j_prime_index = np.argmin([abs(i - j) for j in sorted(H)])  # find the head of i
                n_j_prime = sorted(H)[n_j_prime_index]
        D.add((n_j_prime, i))
        H.add(i)
        s.node[i]["lemma"] = str(rankedindices.index(i))

    for (
        i
    ) in (
        functionindices
    ):  # We attach elements from highest to lowest, i.e. the word with the highest PR will be the dependent of root
        if len(H) == 0:
            n_j_prime = 0
        else:
            n_j_prime_index_headrules = None
            POS_i = s.node[i]["cpostag"]

            possible_headsin_table = list(headrules[headrules["dep"] == POS_i]["head"].values)

            if POS_i in RIGHTATTACHING:  # ["ADP","DET","AUX","SCONJ"]:
                H_headrules = [h for h in H if s.node[h]["cpostag"] in possible_headsin_table and h > i]
            elif POS_i in LEFTATTACHING:
                H_headrules = [h for h in H if s.node[h]["cpostag"] in possible_headsin_table and h < i]
            else:
                H_headrules = [h for h in H if s.node[h]["cpostag"] in possible_headsin_table]

            if H_headrules:
                n_j_prime_index_headrules = np.argmin([abs(i - j) for j in sorted(H_headrules)])  # find the head of i
                n_j_prime = sorted(H_headrules)[n_j_prime_index_headrules]
            else:
                # if not n_j_prime_index_headrules:
                n_j_prime_index = np.argmin([abs(i - j) for j in sorted(H)])  # find the head of i
                n_j_prime = sorted(H)[n_j_prime_index]
        D.add((n_j_prime, i))
        s.node[i]["lemma"] = str(rankedindices.index(i))

    s.add_node(0, attr_dict={"form": "ROOT", "lemma": "ROOT", "cpostag": "ROOT", "postag": "ROOT"})
    s.remove_edges_from(s.edges())
    s.add_edges_from(D)

    # Make sure there are no full 0-attached sentences sentences

    mainpred = sorted(s.successors(0))[0]
    if len(s.successors(0)) > 1:
        for other in sorted(s.successors(0))[1:]:
            s.remove_edge(0, other)
            s.add_edge(mainpred, other)

    if s.node[max(s.nodes())]["cpostag"] == "PUNCT":
        lastperiod = max(s.nodes())
        s.remove_edge(s.head_of(lastperiod), lastperiod)
        s.add_edge(mainpred, lastperiod)

    if s.node[1]["cpostag"] == "PUNCT":
        s.remove_edge(s.head_of(1), 1)
        s.add_edge(mainpred, 1)

    for h, d in s.edges():
        s[h][d]["deprel"] = "root"  # if h == 0 else 'dep'
    return s
Ejemplo n.º 2
0
def create_tree(r, h=None, num_nodes=None):
    #need to have either height or num_nodes
    assert xor(bool(h),bool(num_nodes))
    to_remove=0
    if num_nodes != None:
        if r==1:
            h=num_nodes
        else:
            h=ceil(log(num_nodes*(r-1)+1, r)-1)
            init_size=(r**(h+1)-1)/(r-1)
            to_remove=int(init_size-num_nodes)
        
    #branching factor of 1 does not seem to work for nx.balanced_tree
    result_graph = semGODiGraph(None, Aspects.BP)
    if r ==1:
        for u in range(0,h):
            v=u+1
            result_graph.add_edge(GN(nodetype=GN.TERM_TYPE,dbid=v),GN(nodetype=GN.TERM_TYPE,dbid=u))
    else:	
        internal_graph=nx.balanced_tree(r=r,h=h,create_using=nx.DiGraph()) #gnp_random_graph(10,0.5,directed=True)
        current=internal_graph.number_of_nodes()
        remove_nodes=range(current-to_remove,current)
        for r in remove_nodes:
            internal_graph.remove_node(r)
        if num_nodes != None:
            assert num_nodes == internal_graph.number_of_nodes()
        for u,v in internal_graph.edges_iter():
            result_graph.add_edge(GN(nodetype=GN.TERM_TYPE,dbid=u),GN(nodetype=GN.TERM_TYPE,dbid=v))
        nx.reverse(result_graph, copy=False) #make the edges point up not down
        root_list=[n for n,d in result_graph.out_degree().items() if d==0]
        result_graph.root=root_list[0]
    result_graph.semMakeGraph()
    return result_graph
Ejemplo n.º 3
0
    def error_flow(self, G, src_node, ud=None):
        """Returns the first edges that do not conform to the flow direction
        implicit in defined source node.
        :param G: target digraph
        :param src_node: source node
        :param ud: undirected graph (faster iteration with setdirection)
        :upstream_G: networkx graph
        """
        RG = nx.reverse(G, copy=True)
        flipped_G = nx.MultiDiGraph()
        upstream_list = []
        gnodes = list(nx.dfs_preorder_nodes(RG, src_node))
        if not ud:
            ud = RG.to_undirected()
        connected = RG.edges(nx.dfs_tree(ud, src_node).nodes(),
                             data=True,
                             keys=True)

        for edge in connected:
            start = edge[0]
            end = edge[1]
            if end in gnodes and start not in gnodes:
                upstream_list.append(edge)

        # add new "error_flow" attribute to graph
        self.add_attribute(RG, errorflow, 0)  # add 'default' value
        for u, v, key, d in RG.edges_iter(keys=True, data=True):
            if (u, v, key, d) in upstream_list:
                flipped_G.add_edge(u, v, key, d)
        if flipped_G is not None:
            self.update_attribute(flipped_G, errorflow, 1)
        nx.reverse(RG)
        upstream_G = nx.compose(RG, flipped_G)
        return upstream_G
Ejemplo n.º 4
0
def main():
    if not (len(sys.argv) == 2 and direction in ["forward, backward"]):
        print("usage: ./gen_graph.py [forward/backward]", file=sys.stderr)
        sys.exit(1)

    direction = sys.argv[1]
    if direction == "forward":
        f = roundf
    else:
        f = inv_roundf
    n = 65536

    g = nx.DiGraph()
    for x in range(n):
        for ns, w in f(convert_int(x)):
            y = convert_states(ns)
            g.add_edge(x, y, weight=w)
        print(x)
    nx.write_gpickle(g, "{}.gpickle".format(direction))

    print("Generated {}.gpickle.".format(direction))

    nx.reverse(g, copy=False)
    nx.write_gpickle(g, "rev_{}.gpickle".format(direction))

    print("Generated rev_{}.gpickle.".format(direction))
Ejemplo n.º 5
0
 def get_headwater_edges(self, G, attrb_field, attrb_name):
     """
     Create graph with the headwater edges attributed
     :param G: networkx graph
     :param attrb_field: name of the attribute field
     :param attrb_name: attribute value
     :return headwater_G: graph with new headwater edge type attribute
     """
     if nx.is_directed(G) and G.number_of_nodes() > 2:
         RG = nx.reverse(G, copy=True)
         list_nodes = [n for n in RG.nodes_iter()]
         list_successors = []
         for node in list_nodes:
             list_successors.append(tuple((node, len(RG.edges(node)))))
         headwater_G = nx.MultiDiGraph()
         for i in list_successors:
             if i[1] == 0:
                 headwater_edge = RG.in_edges(i[0], data=True, keys=True)
                 headwater_G.add_edge(*(headwater_edge.pop()))
         headwater_RG = nx.reverse(headwater_G, copy=True)
         self.update_attribute(headwater_RG, attrb_field, attrb_name)
         return headwater_RG
     else:
         headwater_G = nx.MultiDiGraph()
         return headwater_G
Ejemplo n.º 6
0
 def testTree(self, r=2, h=2):
     internal_graph = nx.balanced_tree(
         r=r, h=h,
         create_using=nx.DiGraph())  #gnp_random_graph(10,0.5,directed=True)
     for u, v in internal_graph.edges_iter():
         self.graph.add_edge(GN(nodetype=GN.TERM_TYPE, dbid=u),
                             GN(nodetype=GN.TERM_TYPE, dbid=v))
     nx.reverse(self.graph, copy=False)  #make the edges point up not down
     root_list = [n for n, d in self.graph.out_degree().items() if d == 0]
     self.failUnless(len(root_list) == 1)
     self.graph.root = root_list[0]
     self.graph.semMakeGraph()
     graph_entropy = self.graph.calc_graph_uncert()
     graph_max = -2 * log(1 / float(self.graph.num_nodes), 2)
     graph_min = -1 * log(1 / float(self.graph.num_nodes), 2)
     print "num nodes:" + str(
         self.graph.num_nodes) + " entropy:" + str(graph_entropy)
     nn = float(self.graph.num_nodes)
     component1 = -1 * log(1 / nn, 2)
     component2 = -((r**h) / nn) * log(1 / float(nn - h - 1), 2)
     component3 = -sum([(
         (r**i) / nn) * log(1 / float(nn - i - r**(h - i) - 1), 2)
                        for i in range(1, h)])
     test_entropy = component1 + component2 + component3
     self.failUnless(graph_min < graph_entropy < graph_max)
     self.failUnless(graph_entropy == test_entropy)
Ejemplo n.º 7
0
def solve_entry_tips(graph, starting_nodes):
    """Remove all entry tips from the given graph.
      :Parameters:
          graph: The graph from which we want to remove tips.
          starting_nodes: The entry nodes from the paths.
    """
    reverse_graph = nx.reverse(graph)
    solved_graph = solve_out_tips(reverse_graph, starting_nodes)
    graph = nx.reverse(solved_graph)
    return graph
Ejemplo n.º 8
0
    def testMax(self, n=7):
	#create star graph for maximum case
	internal_graph=nx.star_graph(n-1) #n+1 nodes created for star so 16  #gnp_random_graph(10,0.5,directed=True)
	for u,v in internal_graph.edges_iter():
		self.graph.add_edge(GN(nodetype=GN.TERM_TYPE,dbid=u),GN(nodetype=GN.TERM_TYPE,dbid=v))
	nx.reverse(self.graph, copy=False)
	root_list=[n for n,d in self.graph.out_degree().items() if d==0]
	self.failUnless(len(root_list)==1)
	self.graph.root=root_list[0]
	self.graph.semMakeGraph()
	graph_entropy=self.graph.calc_graph_uncert()
	nn=float(self.graph.num_nodes)
	self.failUnless(graph_entropy < -2*log(1/nn,2))
	self.failUnless(graph_entropy == -1*log(1/nn,2)-((nn-1)/nn)*(log(1/(nn-2),2)))
Ejemplo n.º 9
0
def get_bowtie_components(G):
    # Input should be an adjacency matrix in numpy nd-array format (directed)
#    G = nx.from_numpy_matrix(np.matrix(adjacency_matrix), create_using=nx.DiGraph())
    GT = nx.reverse(G, copy=True)
    
    strongly_con_comp = list(nx.strongly_connected_components(G))
    strongly_con_comp = max(strongly_con_comp, key=len)
    
    S = strongly_con_comp
    
    v_any = list(S)[0]
    DFS_G = set(nx.dfs_tree(G,v_any).nodes())
    DFS_GT = set(nx.dfs_tree(GT,v_any).nodes())
    OUT = DFS_G - S
    IN = DFS_GT - S
    V_rest = set(G.nodes()) - S - OUT - IN
    
    TUBES = set()
    INTENDRILS = set()
    OUTTENDRILS = set()
    OTHER = set()
    for v in V_rest:
        irv = len(IN & set(nx.dfs_tree(GT,v).nodes())) is not 0
        vro = len(OUT & set(nx.dfs_tree(G,v).nodes())) is not 0
        if irv and vro:
            TUBES.add(v)
        elif irv and not vro:
            INTENDRILS.add(v)
        elif not irv and vro:
            OUTTENDRILS.add(v)
        elif not irv and not vro:
            OTHER.add(v)
    
    return S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER
def findAugmentingPath(L, initialNode, finalNode):

    augmentingPath = []
    currentNode = finalNode

    lReversed = nx.reverse(L)

    while currentNode is not initialNode:

        neighbors = list(lReversed.neighbors(currentNode))

        if not neighbors:

            if not augmentingPath:
                return augmentingPath

            edgeToRemove = augmentingPath.pop(0)
            L.remove_node(edgeToRemove[0])
            lReversed.remove_node(edgeToRemove[0])
            currentNode = edgeToRemove[1]

        else:
            neighbor = neighbors[0]
            augmentingPath.insert(0, (neighbor, currentNode))
            currentNode = neighbor

    return augmentingPath
def build_gene_goterm_graph(go_dag, goid_prots):
    """
    For every protein, add an edge from the protein to the GO term IDs to which it's annotated
    *go_dag*: networkx DiGraph DAG containing the is_a edges in the GO DAG 
    *goid_prots*: contains the set of proteins annotated to each GO term ID

    *returns*: the resulting gene-goterm graph (networkx DiGraph), and the graph reversed.
    """

    G = nx.DiGraph()
    G.add_edges_from(go_dag.edges())

    # revG is a copy of the annotation graph G with the GO DAG reversed
    revG = nx.reverse(G, copy=True)

    # set all of the current nodes as goids
    #nx.set_node_attributes(G, 'goid', 'type')

    # For every GO term ID, add an edge in the graph from the proteins annotated to the GO term ID, to the GO term ID
    # This graph allows us to get all of the proteins annotated to descendants (more specific terms) of a term
    for goid in go_dag.nodes():
        for prot in goid_prots[goid]:
            # add an edge from the protein to the GO term its annotated to
            G.add_edge(prot, goid)
            revG.add_edge(prot, goid)

    print("\t%d nodes, %d edges" % (G.number_of_nodes(),G.number_of_edges()))

    return G, revG
Ejemplo n.º 12
0
def get_leaf_nodes(cfg: DiGraph):
    nodes = []
    for node in reverse(cfg, False).neighbors(0):
        if cfg.out_degree(node) == 1:
            nodes.append(node)

    return nodes
Ejemplo n.º 13
0
    def _get_all_merge_points(self, cfg, graph_with_loops):
        """
        Return all possible merge points in this CFG.

        :param cfg: The control flow graph, which must be acyclic.
        :returns:   A list of merge points.
        """

        graph = networkx.DiGraph(cfg.graph)
        reversed_cyclic_graph = networkx.reverse(graph_with_loops, copy=False)

        # Remove all "FakeRet" edges
        fakeret_edges = [ (src, dst) for src, dst, data in graph.edges_iter(data=True)
                          if data['jumpkind'] == 'Ijk_FakeRet' ]
        graph.remove_edges_from(fakeret_edges)

        # Remove all "FakeRet" edges from cyclic_graph as well
        fakeret_edges = [(src, dst) for src, dst, data in reversed_cyclic_graph.edges_iter(data=True)
                         if data['jumpkind'] == 'Ijk_FakeRet']
        reversed_cyclic_graph.remove_edges_from(fakeret_edges)

        # Perform a topological sort
        sorted_nodes = networkx.topological_sort(graph)

        nodes = [ n for n in sorted_nodes if graph.in_degree(n) > 1 and n.looping_times == 0 ]

        # Reorder nodes based on post-dominance relations
        nodes = sorted(nodes,
                       cmp=lambda n1, n2: 1 if self._post_dominate(reversed_cyclic_graph, n1, n2)
                       else (-1 if self._post_dominate(reversed_cyclic_graph, n2, n1)
                        else 0)
                       )

        return list([ (n.addr, n.looping_times) for n in nodes ])
Ejemplo n.º 14
0
 def _traverse(self, origin, method, include_origin, reverse):
     if not origin:
         origin = self.ROOT_NODE_LABEL
     graph = self._graph
     if reverse:
         graph = nx.reverse(graph)
     nodes = method(graph, origin)
     return (node for node in nodes if node != origin or include_origin)
Ejemplo n.º 15
0
def main() -> None:
    G: DiGraph = parse_input()

    # solution 1
    print(len(nx.descendants(nx.reverse(G), "shiny gold")))

    # solution 2
    print(containing_bags(G, "shiny gold") - 1)
Ejemplo n.º 16
0
def fair_proportion(net, u, weight_fn=default_weight_fn, kappa_fn=None):
    if kappa_fn is None:
        kappa_fn = cache_kappa(net)

    rev_net = nx.reverse(net)
    nodes_above_u = nodes_below(rev_net, u)
    edges_above_u = ((w, v) for v in nodes_above_u for w in rev_net[v])
    return sum(weight_fn(net, w, v) / kappa_fn(v) for w, v in edges_above_u)
Ejemplo n.º 17
0
    def get_parameterized_intercitation_dag(self, old_node, new_node, dag):
        desc = nx.descendants(dag, old_node)
        desc.add(old_node)
        anc = nx.ancestors(dag, new_node)
        anc.add(new_node)

        # Intersect lineages to get ad tree
        intersect = desc.intersection(anc)

        if (len(intersect) == 0):
            print "No common intercitations between ", old_node, " and ", new_node
        else:
            rev_dag = nx.reverse(dag, copy=True)
            # Strength of weighting due to impact (# citations)
            impact_param = 1.0

            #Strength of weighting due to network relevance of paper's citations
            network_relevance_param = 1.0

            #Strength of weighting due to redundancy in citation network
            network_robustness_param = 1.0

            sum_citations = sum(
                [pow(dag.in_degree(w), impact_param) for w in intersect])

            #Store importance score
            importance_dict = {}
            for w in intersect:
                importance_dict[w] = pow(dag.in_degree(w), impact_param)

            #Calculate network relevance
            net_relevance = {}
            for w in intersect:
                cited_reach_cnt = 0
                for cited in dag.neighbors(w):
                    #If we can reach old node through cited node add to count
                    if (nx.has_path(dag, cited, old_node)):
                        cited_reach_cnt += 1
                net_relevance[w] = pow(
                    float(cited_reach_cnt) / dag.out_degree(w),
                    network_relevance_param)

            #Calculate network robustness
            net_robustness = {}
            for w in intersect:
                citer_alt_path = 0
                cited_alt_path = 0
                for citer in rev_dag.neighbors(w):
                    #If we can reach old node through citer node (without using that citation as a link)
                    if (nx.has_path(dag, citer, old_node)):
                        citer_alt_path += 1
                for cited in dag.neighbors(w):
                    if (nx.has_path(rev_dag, cited, new_node)):
                        cited_alt_path += 1
                net_robustness[w] = pow(
                    float(cited_alt_path + citer_alt_path) /
                    (dag.out_degree(w) + dag.in_degree(w)),
                    network_robustness_param)
Ejemplo n.º 18
0
 def testMax(self, n=7):
     #create star graph for maximum case
     internal_graph = nx.star_graph(
         n - 1
     )  #n+1 nodes created for star so 16  #gnp_random_graph(10,0.5,directed=True)
     for u, v in internal_graph.edges_iter():
         self.graph.add_edge(GN(nodetype=GN.TERM_TYPE, dbid=u),
                             GN(nodetype=GN.TERM_TYPE, dbid=v))
     nx.reverse(self.graph, copy=False)
     root_list = [n for n, d in self.graph.out_degree().items() if d == 0]
     self.failUnless(len(root_list) == 1)
     self.graph.root = root_list[0]
     self.graph.semMakeGraph()
     graph_entropy = self.graph.calc_graph_uncert()
     nn = float(self.graph.num_nodes)
     self.failUnless(graph_entropy < -2 * log(1 / nn, 2))
     self.failUnless(graph_entropy == -1 * log(1 / nn, 2) -
                     ((nn - 1) / nn) * (log(1 / (nn - 2), 2)))
Ejemplo n.º 19
0
def nonzero(c, s, t, exact, d):
    non = set()
    g = gc(c)
    rev = nx.reverse(g)
    for u,v in rc(c, s, t).intersection(exact):
        if d[u][v] <= 0:
            continue
        non.update(rg(rev, u, v))
    return non
Ejemplo n.º 20
0
def directed_random_tree(n, arrows_from_root, seed):
    G = nx.random_tree(n, seed)
    betweeness = nx.betweenness_centrality(G, normalized=False, seed=0)
    root = max(betweeness, key=lambda key: betweeness[key])
    T = nx.bfs_tree(G, root)  # Arrows point away from root

    if arrows_from_root:
        return T
    else:
        return nx.DiGraph(nx.reverse(T, copy=False))  # Cast from ReverseView
Ejemplo n.º 21
0
def cophenetic_value(net, us, weight_fn=default_weight_fn):
    if not us:
        return 0

    rev_net = nx.reverse(net)
    nodes_above_all_us = frozenset.intersection(
        *[frozenset(nodes_below(rev_net, u)) for u in us])
    edges_above_all_us = ((w, v) for v in nodes_above_all_us
                          for w in rev_net[v])
    return sum(weight_fn(net, w, v) for w, v in edges_above_all_us)
Ejemplo n.º 22
0
    def testTree(self, r=2, h=2):
	internal_graph=nx.balanced_tree(r=r,h=h,create_using=nx.DiGraph()) #gnp_random_graph(10,0.5,directed=True)
	for u,v in internal_graph.edges_iter():
		self.graph.add_edge(GN(nodetype=GN.TERM_TYPE,dbid=u),GN(nodetype=GN.TERM_TYPE,dbid=v))
	nx.reverse(self.graph, copy=False) #make the edges point up not down
	root_list=[n for n,d in self.graph.out_degree().items() if d==0]
	self.failUnless(len(root_list)==1)
	self.graph.root=root_list[0]
	self.graph.semMakeGraph()
	graph_entropy=self.graph.calc_graph_uncert()
	graph_max=-2*log(1/float(self.graph.num_nodes),2)
	graph_min=-1*log(1/float(self.graph.num_nodes),2)
	print "num nodes:"+str(self.graph.num_nodes)+" entropy:"+str(graph_entropy)
	nn=float(self.graph.num_nodes)
	component1=-1*log(1/nn,2)
        component2=-((r**h)/nn)*log(1/float(nn-h-1),2)
        component3=-sum([((r**i)/nn)*log(1/float(nn-i-r**(h-i)-1),2) for i in range(1,h)])
	test_entropy=component1+component2+component3
	self.failUnless(graph_min < graph_entropy < graph_max)
	self.failUnless(graph_entropy == test_entropy )
Ejemplo n.º 23
0
def Test_Backend1():
    G = Generate_The_G_Object_Of_Graph_For_Networkx()
    G = nx.reverse(G)
    all_reachable_nodes_list = nx.descendants(
        G, 'FN_in_3')  #we will get list in FN format, not cell id
    all_reachable_nodes_cell_id_list = []
    for node in all_reachable_nodes_list:
        node_cell_id = Translate_FN_To_Cell_ID(node)
        if node_cell_id not in all_reachable_nodes_cell_id_list:
            all_reachable_nodes_cell_id_list.append(node_cell_id)
    return str(all_reachable_nodes_cell_id_list)
Ejemplo n.º 24
0
def angle_reverse(G):
    rev_edges = nx.reverse(G).edges(data=True)
    def reverse_line(linestring):
        p0, p1 = linestring.coords[:]
        return LineString([Point(p1), Point(p0)])

    def rev_angle(dic):
        dic['angle_deg'] = -dic['angle_deg']
        dic['geometry'] = reverse_line(dic['geometry'])
        return dic
    return [(u, v, rev_angle(dat)) for (u, v, dat) in rev_edges]
def get_bowtie_components(graph):
    '''Classifying the nodes of a network into a bow-tie structure.
    Here we follow the definition of a bow-tie as in: 
    "Bow-tie Decomposition in Directed Graphs" - Yang et al. IEEE (2011) 
    
    input:  NetworkX directed graph or numpy adjacency matrix
    output: sets of nodes in the specified partitions (following the 
            NetworkX input graph node labelling or labelled according to
            the order of the adjacency matrix [0, n-1])
    '''
    import networkx as nx

    # Verify graph input format
    input_formats = [nx.DiGraph, np.ndarray, np.matrix]
    assert type(
        graph
    ) in input_formats, 'Input should be a NetworkX directed graph or numpy adjacency matrix'
    if type(graph) == nx.classes.digraph.DiGraph:
        G = graph.copy()
    if (type(graph) == np.ndarray) | (type(graph) == np.matrix):
        G = nx.from_numpy_matrix(np.matrix(graph), create_using=nx.DiGraph())

    GT = nx.reverse(G, copy=True)

    strongly_con_comp = list(nx.strongly_connected_components(G))
    strongly_con_comp = max(strongly_con_comp, key=len)

    S = strongly_con_comp

    v_any = list(S)[0]
    DFS_G = set(nx.dfs_tree(G, v_any).nodes())
    DFS_GT = set(nx.dfs_tree(GT, v_any).nodes())
    OUT = DFS_G - S
    IN = DFS_GT - S
    V_rest = set(G.nodes()) - S - OUT - IN

    TUBES = set()
    INTENDRILS = set()
    OUTTENDRILS = set()
    OTHER = set()
    for v in V_rest:
        irv = len(IN & set(nx.dfs_tree(GT, v).nodes())) is not 0
        vro = len(OUT & set(nx.dfs_tree(G, v).nodes())) is not 0
        if irv and vro:
            TUBES.add(v)
        elif irv and not vro:
            INTENDRILS.add(v)
        elif not irv and vro:
            OUTTENDRILS.add(v)
        elif not irv and not vro:
            OTHER.add(v)

    return S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER
    def get_parameterized_intercitation_dag(self,old_node,new_node,dag):
        desc = nx.descendants(dag,old_node)
        desc.add(old_node)
        anc = nx.ancestors(dag,new_node)
        anc.add(new_node)

        # Intersect lineages to get ad tree
        intersect = desc.intersection(anc)

        if (len(intersect) == 0):
            print "No common intercitations between ",old_node," and ",new_node
        else:
          rev_dag = nx.reverse(dag,copy=True)
          # Strength of weighting due to impact (# citations)
          impact_param = 1.0

          #Strength of weighting due to network relevance of paper's citations
          network_relevance_param = 1.0

          #Strength of weighting due to redundancy in citation network
          network_robustness_param = 1.0

          sum_citations = sum([pow(dag.in_degree(w),impact_param) for w in intersect])

          #Store importance score
          importance_dict = {}
          for w in intersect:
            importance_dict[w] = pow(dag.in_degree(w),impact_param)

          #Calculate network relevance
          net_relevance = {}
          for w in intersect:
            cited_reach_cnt = 0
            for cited in dag.neighbors(w):
              #If we can reach old node through cited node add to count
              if (nx.has_path(dag,cited,old_node)):
                cited_reach_cnt += 1
            net_relevance[w] = pow(float(cited_reach_cnt)/dag.out_degree(w),network_relevance_param)


          #Calculate network robustness
          net_robustness = {}
          for w in intersect:
            citer_alt_path = 0
            cited_alt_path = 0
            for citer in rev_dag.neighbors(w):
              #If we can reach old node through citer node (without using that citation as a link)
              if (nx.has_path(dag,citer,old_node)):
                citer_alt_path += 1
            for cited in dag.neighbors(w):
              if (nx.has_path(rev_dag,cited,new_node)):
                cited_alt_path += 1
            net_robustness[w] = pow(float(cited_alt_path + citer_alt_path)/(dag.out_degree(w) + dag.in_degree(w)),network_robustness_param)
Ejemplo n.º 27
0
    def plot_for_variable(self, variable):
        graph = nx.DiGraph()
        variable_edges = pd.read_csv("connections.csv")
        complete_edges = variable_edges[
            variable_edges['Variable_set'].notnull()
            & variable_edges['Variable_get'].notnull()]
        for index, row in complete_edges.iterrows():
            graph.add_edge(row[4], row[0])
        reversed = nx.reverse(graph)

        variable_decendents = list(nx.descendants(reversed, variable))
        variable_decendents.append(variable)
        self.plot(variable_decendents)
Ejemplo n.º 28
0
    def add_cited_citing_nodes(self, dag, citegraph):
        """ Take original subgraph of cite graph - dag and add to it all in and out neighbors """
        fuller_dag = nx.DiGraph()
        rev_cite = nx.reverse(citegraph, copy=True)
        for node in dag.nodes():
            for neighbor in citegraph.neighbors(node):
                fuller_dag.add_edge(node, neighbor)
            for in_neighbor in rev_cite.neighbors(node):
                fuller_dag.add_edge(in_neighbor, node)

        assert (len(list(nx.simple_cycles(fuller_dag))) == 0)

        return fuller_dag
    def add_cited_citing_nodes(self,dag, citegraph):
      """ Take original subgraph of cite graph - dag and add to it all in and out neighbors """
      fuller_dag = nx.DiGraph()
      rev_cite = nx.reverse(citegraph,copy=True)
      for node in dag.nodes():
        for neighbor in citegraph.neighbors(node):
          fuller_dag.add_edge(node,neighbor)
        for in_neighbor in rev_cite.neighbors(node):
          fuller_dag.add_edge(in_neighbor,node)

      assert(len(list(nx.simple_cycles(fuller_dag))) == 0)

      return fuller_dag
Ejemplo n.º 30
0
def convert_location_type(location, source_type, desired_type):
    """ Converts the provided location into the desired location_type

    This will perform a DFS on the location graph to find connected components
    to the supplied node and then filter by the desired location_type.
    Basically if we consider our datacenter layout a DAG, then this method will
    search any nodes connected to the source location looking for the proper
    type.

    Examples:
    Assume available_location_types() is ['ecosystem', 'region', 'habitat'],
    and the location graph is:
     - prod
       - uswest1-prod
         - uswest1aprod
         - uswest1bprod

    # convert a habitat to the containing ecosystem
    convert_location_type('uswest1aprod', 'habitat', 'ecosystem') -> ['prod']
    # convert a region to the member habitats
    convert_location_type('uswest1-prod', 'region', 'habitat') ->
        ['uswest1aprod', 'uswest1bprod']

    :param location: A string that represents a location, e.g. "devc"
    :param source_type: A string that should exist inside the list returned
        by available_location_types. This is the type of the provided location
        and is optional. This exists because the names in the DAG may not be
        unique across all levels, and providing this type will disambiguate
        between which "devc" you mean (ecosystem or habitat).
    :param desired_type: A string that should exist inside the
        list returned by available_location_types. This is the desired type
        that the caller wants.
    :returns: locations, A list of locations that are of the location_type.
        These will be connected components filtered by type. Note that
        these results are sorted for calling consistency before returning.
    :rtype: list of strings
    """
    search_node = '{0}_{1}'.format(location, source_type)

    direction = compare_types(desired_type, source_type)
    candidates = set()
    if direction < 0:
        # We are converting "up", and have to walk the tree backwards
        reversed_graph = nx.reverse(location_graph())
        candidates |= set(nx.dfs_preorder_nodes(reversed_graph, search_node))
    else:
        candidates |= set(nx.dfs_preorder_nodes(location_graph(), search_node))

    # Only return results that are of the correct type
    result = filter(lambda x: x.endswith('_' + desired_type), candidates)
    return sorted([loc[:loc.rfind('_')] for loc in result])
Ejemplo n.º 31
0
    def preprocess(self):
        '''
		Preprocess the graph labeling every node with the shortest cost to the end node (target)
		'''

        #nx.johnson(self.G,weight='Cost')

        #G_r=nx.DiGraph(self.G).reverse(copy=True)

        for i, r in enumerate(self.R):
            t = nx.shortest_path_length(self.G, weight=r, target=self.target)
            attrs = {i: {"s_" + r: t[i]} for i in t.keys()}
            attrs.update({
                i: {
                    "s_" + r: float("inf")
                }
                for i in self.G.nodes() if i not in t.keys()
            })
            nx.set_node_attributes(self.G, attrs)
            #self.minimum_time=attrs[self.source]["s_time"]

            if self.R_max[i] == 0:
                try:
                    self.R_max[i] = self.G[self.source]['s_' + r] * (
                        1 + self.tightness)
                except:
                    print("Infeasible")

        if self.n == None:

            #p=nx.shortest_path_length(self.G,weight="Cost",target=self.target)

            pred, p = nx.bellman_ford_predecessor_and_distance(
                nx.reverse(self.G, copy=True),
                weight="Cost",
                source=self.target)

            attrs = {i: {"labels": [], "s_cost": p[i]} for i in p.keys()}
            attrs.update({
                i: {
                    "labels": [],
                    "s_cost": float("inf")
                }
                for i in self.G.nodes() if i not in p.keys()
            })
            nx.set_node_attributes(self.G, attrs)
        else:
            self.bounding_scheme()

        for i in self.G.nodes:
            self.G.nodes[i]["labels"] = []
Ejemplo n.º 32
0
def partition_strings_2set(X, C, X_file, C_file, params):
    """

    """

    G_star = graphs.construct_exact_2set_nearest_neighbor_bipartite_graph(
        X, C, X_file, C_file, params)
    # G_star, alignment_graph = graphs.construct_2set_nearest_neighbor_bipartite_graph(X, C, X_file, C_file)
    G_star_transposed = nx.reverse(G_star)  #functions.transpose(G_star)
    partition = {
    }  # dict with a center as key and a set containing all sequences chosen to this partition

    # candidate_nodes, read_nodes = bipartite.sets(G_star_transposed)

    read_nodes = set(n for n, d in G_star_transposed.nodes(data=True)
                     if d['bipartite'] == 0)
    candidate_nodes = set(G_star_transposed) - read_nodes

    read_deg, cand_deg = bipartite.degrees(G_star_transposed, candidate_nodes)
    # print(len(read_nodes), len(candidate_nodes))
    # print(read_deg)
    # print(cand_deg)

    ######################
    while len(candidate_nodes) > 0:
        read_deg, cand_deg = bipartite.degrees(G_star_transposed,
                                               candidate_nodes)
        read_deg, cand_deg = dict(read_deg), dict(cand_deg)
        # print(type(read_deg), read_deg)
        # print(type(cand_deg), cand_deg)
        # print("reads left:", len(read_deg))
        # print("cands left:", len(cand_deg))
        m = max(sorted(cand_deg), key=lambda key: cand_deg[key])
        reads_supporting_m = list(G_star_transposed.neighbors(m))
        partition[m] = set(reads_supporting_m)
        G_star_transposed.remove_node(m)
        G_star_transposed.remove_nodes_from(reads_supporting_m)

        read_nodes = set(n for n, d in G_star_transposed.nodes(data=True)
                         if d['bipartite'] == 0)
        candidate_nodes = set(G_star_transposed) - read_nodes
        # candidate_nodes, read_nodes = bipartite.sets(G_star_transposed)

        # print("total nodes left after removal:", len(G_star_transposed.nodes()), "tot candidate nodes left:", candidate_nodes)
        # print(read_nodes, [G_star[node] for node in read_nodes])
        # print(len(reads_supporting_m) , len(G_star_transposed.nodes()), G_star_transposed.nodes() )

    # print([ (m,len(partition[m])) for m in partition] )
    #####################

    return G_star, partition
Ejemplo n.º 33
0
def convert_location_type(location, source_type, desired_type):
    """ Converts the provided location into the desired location_type

    This will perform a DFS on the location graph to find connected components
    to the supplied node and then filter by the desired location_type.
    Basically if we consider our datacenter layout a DAG, then this method will
    search any nodes connected to the source location looking for the proper
    type.

    Examples:
    Assume available_location_types() is ['ecosystem', 'region', 'habitat'],
    and the location graph is:
     - prod
       - uswest1-prod
         - uswest1aprod
         - uswest1bprod

    # convert a habitat to the containing ecosystem
    convert_location_type('uswest1aprod', 'habitat', 'ecosystem') -> ['prod']
    # convert a region to the member habitats
    convert_location_type('uswest1-prod', 'region', 'habitat') ->
        ['uswest1aprod', 'uswest1bprod']

    :param location: A string that represents a location, e.g. "devc"
    :param source_type: A string that should exist inside the list returned
        by available_location_types. This is the type of the provided location
        and is optional. This exists because the names in the DAG may not be
        unique across all levels, and providing this type will disambiguate
        between which "devc" you mean (ecosystem or habitat).
    :param desired_type: A string that should exist inside the
        list returned by available_location_types. This is the desired type
        that the caller wants.
    :returns: locations, A list of locations that are of the location_type.
        These will be connected components filtered by type. Note that
        these results are sorted for calling consistency before returning.
    :rtype: list of strings
    """
    search_node = '{0}_{1}'.format(location, source_type)

    direction = compare_types(desired_type, source_type)
    candidates = set()
    if direction < 0:
        # We are converting "up", and have to walk the tree backwards
        reversed_graph = nx.reverse(location_graph())
        candidates |= set(nx.dfs_preorder_nodes(reversed_graph, search_node))
    else:
        candidates |= set(nx.dfs_preorder_nodes(location_graph(), search_node))

    # Only return results that are of the correct type
    result = filter(lambda x: x.endswith('_' + desired_type), candidates)
    return sorted([loc[:loc.rfind('_')] for loc in result])
Ejemplo n.º 34
0
def difastmap_average_limitstorage(G,
                                   K,
                                   epsilon,
                                   dis_store,
                                   PNumber,
                                   alg='L1'):
    NG = G.copy()
    RG = nx.reverse(NG)
    embedding = {}
    # initial the embedding as a dict
    for node in list(NG.nodes()):
        embedding[node] = []
    for r in range(K):
        node_a = choice(list(NG.nodes()))
        node_b = node_a
        # Find the farthest nodes a, b
        for t in range(C):
            length_o = nx.single_source_dijkstra_path_length(NG, node_a)
            length_i = nx.single_source_dijkstra_path_length(RG, node_a)
            length = combine_length(length_o, length_i, embedding, node_a, r,
                                    alg)
            node_c = max(length.items(), key=operator.itemgetter(1))[0]
            if node_c == node_b:
                break
            else:
                node_b = node_a
                node_a = node_c
        length_oa = nx.single_source_dijkstra_path_length(NG, node_a)
        length_ia = nx.single_source_dijkstra_path_length(RG, node_a)
        length_a = combine_length(length_oa, length_ia, embedding, node_a, r,
                                  alg)
        length_ob = nx.single_source_dijkstra_path_length(NG, node_b)
        length_ib = nx.single_source_dijkstra_path_length(RG, node_b)
        length_b = combine_length(length_ob, length_ib, embedding, node_b, r,
                                  alg)
        if r < PNumber:
            store_distances(G, dis_store, node_a, length_oa, length_ia)
            store_distances(G, dis_store, node_b, length_ob, length_ib)
        dis_ab = length_a[node_b]
        if dis_ab < epsilon:
            break
        # Calcute the embedding
        for node in list(NG.nodes()):
            if alg == 'L1':
                p_ir = float(length_a[node] + dis_ab - length_b[node]) / 2
            elif alg == 'L2':
                p_ir = float(length_a[node] + dis_ab -
                             length_b[node]) / (2 * math.sqrt(dis_ab))
            embedding[node].append(p_ir)
    return embedding
Ejemplo n.º 35
0
def model_dag(model):
    dag_dict = {}
    for input_layer in model.input_layers:
        _collect_layer_dag_dict(input_layer, dag_dict)

    # Restrict the dag to what is reachable from the model inputs and outputs
    dag = nx.from_dict_of_lists(dag_dict, create_using=nx.DiGraph())
    dag = restrict(dag, model.input_layers)
    dag = nx.reverse(dag)
    dag = restrict(dag, model.output_layers)
    dag = nx.reverse(dag)

    # Check that each of the model's output layers is in the dag collected,
    # and that each dag leaf is in the model's output layers (but there can be
    # output layers that are not leaves).
    model_outputs = set(model.output_layers)
    for output in model_outputs:
        assert output in dag.nodes
    leaves = set([n for n in dag.nodes if len(list(dag.successors(n))) == 0])
    for leaf in leaves:
        assert leaf in model_outputs

    return dag
Ejemplo n.º 36
0
    def set_routing(self, R, sink):
        self.R = R
        self.sink = sink

        # Assign indicies
        self.index_for_label = {sink: 0}
        self.label_for_index = {0: sink}
        i = 1
        r = nx.reverse(self.R)
        for e in nx.bfs_edges(r, sink):
            if e[1] not in self.index_for_label:
                self.index_for_label[e[1]] = i
                self.label_for_index[i] = e[1]
                i += 1
Ejemplo n.º 37
0
def create_tree(r, h=None, num_nodes=None):
    #need to have either height or num_nodes
    assert xor(bool(h), bool(num_nodes))
    to_remove = 0
    if num_nodes != None:
        if r == 1:
            h = num_nodes
        else:
            h = ceil(log(num_nodes * (r - 1) + 1, r) - 1)
            init_size = (r**(h + 1) - 1) / (r - 1)
            to_remove = int(init_size - num_nodes)

    #branching factor of 1 does not seem to work for nx.balanced_tree
    result_graph = semGODiGraph(None, Aspects.BP)
    if r == 1:
        for u in range(0, h):
            v = u + 1
            result_graph.add_edge(GN(nodetype=GN.TERM_TYPE, dbid=v),
                                  GN(nodetype=GN.TERM_TYPE, dbid=u))
    else:
        internal_graph = nx.balanced_tree(
            r=r, h=h,
            create_using=nx.DiGraph())  #gnp_random_graph(10,0.5,directed=True)
        current = internal_graph.number_of_nodes()
        remove_nodes = range(current - to_remove, current)
        for r in remove_nodes:
            internal_graph.remove_node(r)
        if num_nodes != None:
            assert num_nodes == internal_graph.number_of_nodes()
        for u, v in internal_graph.edges_iter():
            result_graph.add_edge(GN(nodetype=GN.TERM_TYPE, dbid=u),
                                  GN(nodetype=GN.TERM_TYPE, dbid=v))
        nx.reverse(result_graph, copy=False)  #make the edges point up not down
        root_list = [n for n, d in result_graph.out_degree().items() if d == 0]
        result_graph.root = root_list[0]
    result_graph.semMakeGraph()
    return result_graph
Ejemplo n.º 38
0
def page_rank(G, m, n=100):
    """Function to perform the PageRank Algorithm.

        Parameters
        -----------
        G : nx.DiGraph Object
            A nx.DiGraph object holding nodes, edges and neighbors as attributes. See nx.DiGraph Doc for further information.
        n : int (default: 100)
            Number of Walks that should be performed (output should stablize for a default value of 100 iterations)
        m : float
            Damping Factor (= Probability, to move to a random node)

        Returns
        -----------
        visited :  dict
            A dictionary, sorted descending by values for each key, holding the impoortance score of visits for each node.
    """

    # setup
    size = len(G)  # number of nodes
    G_reverse = nx.reverse(
        G
    )  # directed graph object with reversed edges (later used to compute the backlinks of each node)
    dangling_nodes = find_dangling_nodes(G)  # list of dangling nodes
    x = {node: 1 / size
         for node in G.nodes()
         }  # x_0 (starting vector with equal importance for each node)

    # initialize mSx_k
    S = m * 1 / size

    for _ in range(n):
        D = (1 - m) * sum(x[node] / size for node in dangling_nodes)

        for node, score in x.items():
            backlinks = [i for i in G_reverse.neighbors(node)]
            scores = [
                1 / len([i for i in G.neighbors(backlink)])
                for backlink in backlinks
            ]

            A = 0
            for i, backlink in enumerate(backlinks):
                A += scores[i] * x[backlink]
            A = (1 - m) * A

            x[node] = A + D + S

    return dict(sorted(x.items(), key=lambda x: x[1], reverse=True))
Ejemplo n.º 39
0
def main():
    g = nx.DiGraph()
    for x in range(4096):
        for ns, w in roundf(convert_int(x)):
            y = convert_states(ns)
            g.add_edge(x, y, weight=w)
    nx.write_gpickle(g, "forward.gpickle")
    print("Generated forward.gpickle.")

    nx.reverse(g, copy=False)
    nx.write_gpickle(g, "rev_forward.gpickle")
    print("Generated rev_forward.gpickle.")

    g = nx.DiGraph()
    for x in range(4096):
        for ns, w in inv_roundf(convert_int(x)):
            y = convert_states(ns)
            g.add_edge(x, y, weight=w)
    nx.write_gpickle(g, "backward.gpickle")
    print("Generated backward.gpickle.")

    nx.reverse(g, copy=False)
    nx.write_gpickle(g, "rev_backward.gpickle")
    print("Generated rev_backward.gpickle.")
def plot_cascade_patterns_shapes():
    cascade_type_frequency = {}

    # Read in the mention frequencies
    with open('../data/logs/cascade_shapes.tsv', 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter='\t')
        for row in reader:
            pattern = row[0].replace("[", "").replace("]", "").replace("'", "")
            # build graph from pattern
            local_di_graph = nx.DiGraph()
            for edge in pattern.split(","):
                source_node = edge.strip().split("->")[0]
                target_node = edge.strip().split("->")[1]
                local_di_graph.add_edge(source_node, target_node)

            # check if the local_graph has been added before - complexity: O(n^2)
            frequency = int(row[1])
            exists = False
            graph_key = local_di_graph
            for prior_key in cascade_type_frequency:
                if nx.is_isomorphic(graph_key, prior_key):
                    graph_key = prior_key
                    exists = True
                    break
            if not exists:
                cascade_type_frequency[graph_key] = frequency
            else:
                cascade_type_frequency[graph_key] += frequency

    # plot the top 5 patterns
    k = 16
    data = {'counts': pd.Series(cascade_type_frequency.values(), index=cascade_type_frequency.keys())}
    df = pd.DataFrame(data)
    df = df.sort('counts', ascending=False)
    plt.figure(1)
    for i in range(1, k+1):
        plt.subplot(4, 4, i)
        # pos = nx.graphviz_layout(df.index[i], prog='dot')
        nx.draw(nx.reverse(df.index[i-1]), with_labels=False, node_size=50)
        plt.title("$r_{" + str(i) + "}$")
    plt.tight_layout()
    plt.savefig('../plots/cascade_shapes.pdf', bbox_inches='tight')
    plt.clf()
    def add_relevant_citing_nodes(self,dag,citegraph, percentage_rank):
        rev_citegraph = nx.reverse(citegraph, copy=True)
        cite_relevance_dict = {}
        citers_relevance_dict = {}
        if dag == None:
          print "No itersection dag"
          return dag
        for node in citegraph:
            cited = set(citegraph.neighbors(node))
            if len(cited) == 0:
                continue

            cite_relevance_dict[node] = -float( len(cited.intersection(set(dag.nodes()))) /len(cited))

        for node in citegraph:
            citers = set(rev_citegraph.neighbors(node))
            if len(citers) == 0:
                continue
            citers_relevance_dict[node] = -float( len(citers.intersection(set(dag.nodes()))) /len(citers))

        sorted_cite_relevance = sorted(cite_relevance_dict.items(),key =operator.itemgetter(1))

        sorted_citer_relevance = sorted(citers_relevance_dict.items(),key =operator.itemgetter(1))

        print sorted_cite_relevance ,sorted_citer_relevance
        for x in range(int(percentage_rank*len(sorted_cite_relevance))):
            if x < len(sorted_cite_relevance) -1:
                node_to_add = sorted_cite_relevance[x]
                for neigh in citegraph.neighbors(node_to_add[0]):
                    if neigh in dag.nodes():
                        dag.add_edge(node_to_add[0],neigh)
                        print "adding edge",(node_to_add[0],neigh)


        for x in range(int(percentage_rank*len(sorted_citer_relevance))):
            if x < len(sorted_citer_relevance) -1:
                node_to_add = sorted_citer_relevance[x]
                for neigh in rev_citegraph.neighbors(node_to_add[0]):
                    if neigh in dag.nodes():
                        dag.add_edge(neigh,node_to_add[0])
                        print "adding edge",(neigh,node_to_add[0])
        return dag
    def greedy_coherence_graph_create(self,dag):
        q = []
        cohGraph = nx.DiGraph()
        rev_dag = nx.reverse(dag,copy=True)
        chain_list = []
        num_chains = 0
        #Add all nodes to priority queue
        for node in dag.nodes():
            #All nodes have initially 0 coherence as chains
            #print "pushing node",node," to heap"
            heappush(q,(-10,[node]))
            num_chains += 1


        while num_chains < self.threshold_num_chains:
            if (len(q)) == 0:
                #print "Out of choices to pop from heap"
                break
            best_choice = heappop(q)

            #print best_choice, "is popped val"
            best_chain = best_choice[1]
            #print type(best_chain),"is type of chain of length ", len(best_chain)

            # If chain is long enough, turn it into a vertex of G
            if len(best_chain) >= self.m_chain_length:
                chain_list.append(best_chain)
                num_chains += 1
            else:
                #Generate all extensions of best_Choice and add to queue
                #print "Generating all extensions of ",best_chain
                #x=raw_input()
                #Get last element of chain
                last_ele = best_chain[-1]

                #All extensions
                num_ext = 0

                for i,ext in enumerate(rev_dag.neighbors(last_ele)):
                    if (ext in best_chain):
                        continue
                    #print "Adding to chain",best_chain, " with ",ext

                    new_chain = best_chain[:]
                    new_chain.append(ext)
                    #print "Adding new chain as extension",new_chain
                    chain_coherence= self.calc_coherence(new_chain)
                    if (chain_coherence < -self.min_coherence):
                        heappush(q,(chain_coherence,new_chain))
                        num_ext += 1
                #if (num_ext == 0):
                
                    #print "No more extensions possible"
                    #break
        print "Done computing chains"
            #Now convert nodes in coherence graph into paths, linking paths at all intersecting nodes
        for chain in chain_list:
            print str(chain), "coherence - ",self.calc_coherence(chain)
            cohGraph.add_node(tuple(chain))

        for chain in chain_list:
            for other_chain in chain_list:
                if (chain != other_chain):
                    if chain[1:] == other_chain[:-1]:
                        #print "Overlap",str(chain), " and ",other_chain
                        cohGraph.add_edge(tuple(chain),tuple(other_chain))
                    if other_chain[1:] == chain[:-1]:
                        #print "Overlap",chain, " and ",other_chain
                        cohGraph.add_edge(tuple(other_chain),tuple(chain))
        print "Num overlaps (Edges)",len(cohGraph.edges()), "num chains :",len(cohGraph.nodes())

        #Make sure construction made no loops
        assert(len(list(nx.simple_cycles(cohGraph))) == 0)
        #Save copy to class variable
        self.cohGraph = cohGraph
        #Set node attribute
        nx.set_node_attributes(self.cohGraph, 'walk-tally',0)
def create_plot(G, sritys, layout='spring', K=0.2, 
                        dedirect=False, sritdraw=True, reverse=False):
    """
    A Graph object
    
    Arguments:
    sritys - a string that consist of one or any number of letters 
    representing different fields 'hmptba'
    layout - networkx layout
    K - networkx K value
    dedirect = forces graph to be converted to a graph
    sridraw - does something
    revers - reverses a graph for a different kind of plot
    
    returns:
    nothing. But shows a figure.
    """
    
    plt.figure(figsize=(10, 10))
    outdeg = G.out_degree()
    outdeg = G.in_degree()

    SRITYS = [x.upper() for x in sritys]
    
    if sritdraw == True:
        to_keep = [z[0] for z in G.nodes(data=True) if z[1]['srtkds'] in SRITYS]
        THELIST = []
        for h in to_keep:
            LALA = G.neighbors(h)
            THELIST.extend(LALA)
        to_keep.extend(THELIST)
        G = G.subgraph(to_keep)
        
    if layout == 'spring':
        pos = nx.spring_layout(G, k=K, iterations=1000)
        
    if layout == 'spectral':
        pos = nx.spectral_layout(G, dim=2, weight='weight', scale=1)
        
    count = -1
    count += 1
    COLORS = ['r', 'b', 'g', 'c', 'm', 'y', 'k']
    SKSLIST = [z[0] for z in G.nodes(data=True) if z[1]['tipas'] == u'saka']
    KRPLIST = [z[0] for z in G.nodes(data=True) if z[1]['tipas'] == u'kryptis']
    
    if dedirect == True:
        G = nx.Graph(G)
    if reverse == True:
        G = nx.reverse(G)

    nx.draw_networkx_nodes(G,
                           pos,
                           #nodelist=G.nodes()[5:-1],
                           nodelist= SKSLIST,
                           node_color= 'k', #fak_colors[x],
                           node_shape= 's', #so^>v<dph8
                           node_size= 200,
                           alpha=0.1)

    nx.draw_networkx_nodes(G,
                           pos,
                           #nodelist=G.nodes()[5:-1],
                           nodelist=KRPLIST,
                           node_color=COLORS[1], #fak_colors[x],
                           node_shape='s', #so^>v<dph8
                           node_size=500,
                           alpha=0.25)

    nx.draw_networkx_edges(G,
                           pos,
                           edgelist=G.edges(),
                           # edgelist = [(u,v,d) for u,v,d in G.edges(data=True)],
                            width=1,
                            alpha=0.1,
                           # weight=1,
                            edge_color='k')

    nx.draw_networkx_labels(G, 
                            pos, 
                            labels=None, 
                            font_size=12, 
                            font_color='k', 
                            font_family='sans-serif', 
                            font_weight='normal', 
                            alpha=0.6, 
                            ax=None)


    plt.title(u'2011 m Šmm Klasifikacijos tarpdiscipliniškumas')
    plt.grid(False)
    return plt.figure(1) 
Ejemplo n.º 44
0
def prune_graph(graph,in_hinges,out_hinges,reverse=False):

    H=nx.DiGraph()
    if reverse:
        G=nx.reverse(graph,copy=True)
    else:
        G=graph
    start_nodes = [x for x in G.nodes() if G.in_degree(x) ==0]

    in_hinges = list(in_hinges.intersection(set(G.nodes())))
    out_hinges = list(out_hinges.intersection(set(G.nodes())))

    if reverse:
        for node in in_hinges:
            for successor in G.successors(node):
#                 H.add_edge(node,successor)
                H.add_node(successor)
        for node in out_hinges:
            H.add_node(node)
    else:
        for node in out_hinges:
            for successor in G.successors(node):
#                 H.add_edge(node,successor)
                H.add_node(successor)
        for node in in_hinges:
            H.add_node(node)
    map(H.add_node,start_nodes)
    all_vertices=set(G.nodes())
    current_vertices=set(H.nodes())
    undiscovered_vertices=all_vertices-current_vertices
    last_discovered_vertices=current_vertices
    while undiscovered_vertices:
        discovered_vertices_set=set([x for node in last_discovered_vertices
                                  for x in G.successors(node)
                                  if x not in current_vertices])
        for vertex in discovered_vertices_set:
            for v_predecessor in G.predecessors(vertex):
                if v_predecessor in current_vertices:
                    H.add_edge(v_predecessor,vertex)
                    break
        current_vertices=current_vertices.union(discovered_vertices_set)
#         print len(undiscovered_vertices)
        if len(discovered_vertices_set)==0:
            print last_discovered_vertices
            print 'did not reach all nodes'
            print 'size of G: '+str(len(G.nodes()))
            print 'size of H: '+str(len(H.nodes()))
#             return H

            rand_node = list(undiscovered_vertices)[0]

            discovered_vertices_set.add(rand_node)


        last_discovered_vertices=discovered_vertices_set
        undiscovered_vertices=all_vertices-current_vertices
#     if reverse:
#         for vertex in out_hinges:
#             for v_predecessor in G.predecessors(vertex):
#                 H.add_edge(v_predecessor,vertex)
#     else:
#         for vertex in in_hinges:
#             for v_predecessor in G.predecessors(vertex):
#                 H.add_edge(v_predecessor,vertex)
    if reverse:
        for node in in_hinges:
            for successor in G.successors(node):
                H.add_edge(node,successor)
        for node in out_hinges:
            for predecessor in G.predecessors(node):
                H.add_edge(predecessor,node)
    else:
        for node in out_hinges:
            for successor in G.successors(node):
                H.add_edge(node,successor)
        for node in in_hinges:
            for predecessor in G.predecessors(node):
                H.add_edge(predecessor,node)
    if reverse:
        return nx.reverse(H)
    return H
Ejemplo n.º 45
0
def weighted_bridge_augmentation(G, avail, weight=None):
    """Finds an approximate min-weight 2-edge-augmentation of G.

    This is an implementation of the approximation algorithm detailed in [1]_.
    It chooses a set of edges from avail to add to G that renders it
    2-edge-connected if such a subset exists.  This is done by finding a
    minimum spanning arborescence of a specially constructed metagraph.

    Parameters
    ----------
    G : NetworkX graph
       An undirected graph.

    avail : set of 2 or 3 tuples.
        candidate edges (with optional weights) to choose from

    weight : string
        key to use to find weights if avail is a set of 3-tuples where the
        third item in each tuple is a dictionary.

    Yields
    ------
    edge : tuple
        Edges in the subset of avail chosen to bridge augment G.

    Notes
    -----
    Finding a weighted 2-edge-augmentation is NP-hard.
    Any edge not in ``avail`` is considered to have a weight of infinity.
    The approximation factor is 2 if ``G`` is connected and 3 if it is not.
    Runs in :math:`O(m + n log(n))` time

    References
    ----------
    .. [1] Khuller, Samir, and Ramakrishna Thurimella. (1993) Approximation
        algorithms for graph augmentation.
        http://www.sciencedirect.com/science/article/pii/S0196677483710102

    See Also
    --------
    :func:`bridge_augmentation`
    :func:`k_edge_augmentation`

    Example
    -------
    >>> G = nx.path_graph((1, 2, 3, 4))
    >>> # When the weights are equal, (1, 4) is the best
    >>> avail = [(1, 4, 1), (1, 3, 1), (2, 4, 1)]
    >>> sorted(weighted_bridge_augmentation(G, avail))
    [(1, 4)]
    >>> # Giving (1, 4) a high weight makes the two edge solution the best.
    >>> avail = [(1, 4, 1000), (1, 3, 1), (2, 4, 1)]
    >>> sorted(weighted_bridge_augmentation(G, avail))
    [(1, 3), (2, 4)]
    >>> #------
    >>> G = nx.path_graph((1, 2, 3, 4))
    >>> G.add_node(5)
    >>> avail = [(1, 5, 11), (2, 5, 10), (4, 3, 1), (4, 5, 1)]
    >>> sorted(weighted_bridge_augmentation(G, avail=avail))
    [(1, 5), (4, 5)]
    >>> avail = [(1, 5, 11), (2, 5, 10), (4, 3, 1), (4, 5, 51)]
    >>> sorted(weighted_bridge_augmentation(G, avail=avail))
    [(1, 5), (2, 5), (4, 5)]
    """

    if weight is None:
        weight = 'weight'

    # If input G is not connected the approximation factor increases to 3
    if not nx.is_connected(G):
        H = G.copy()
        connectors = list(one_edge_augmentation(H, avail=avail, weight=weight))
        H.add_edges_from(connectors)

        for edge in connectors:
            yield edge
    else:
        connectors = []
        H = G

    if len(avail) == 0:
        if nx.has_bridges(H):
            raise nx.NetworkXUnfeasible('no augmentation possible')

    avail_uv, avail_w = _unpack_available_edges(avail, weight=weight, G=H)

    # Collapse input into a metagraph. Meta nodes are bridge-ccs
    bridge_ccs = nx.connectivity.bridge_components(H)
    C = collapse(H, bridge_ccs)

    # Use the meta graph to shrink avail to a small feasible subset
    mapping = C.graph['mapping']
    # Choose the minimum weight feasible edge in each group
    meta_to_wuv = {
        (mu, mv): (w, uv)
        for (mu, mv), uv, w in _lightest_meta_edges(mapping, avail_uv, avail_w)
    }

    # Mapping of terms from (Khuller and Thurimella):
    #     C         : G_0 = (V, E^0)
    #        This is the metagraph where each node is a 2-edge-cc in G.
    #        The edges in C represent bridges in the original graph.
    #     (mu, mv)  : E - E^0  # they group both avail and given edges in E
    #     T         : \Gamma
    #     D         : G^D = (V, E_D)

    #     The paper uses ancestor because children point to parents, which is
    #     contrary to networkx standards.  So, we actually need to run
    #     nx.least_common_ancestor on the reversed Tree.

    # Pick an arbitrary leaf from C as the root
    root = next(n for n in C.nodes() if C.degree(n) == 1)
    # Root C into a tree TR by directing all edges away from the root
    # Note in their paper T directs edges towards the root
    TR = nx.dfs_tree(C, root)

    # Add to D the directed edges of T and set their weight to zero
    # This indicates that it costs nothing to use edges that were given.
    D = nx.reverse(TR).copy()

    nx.set_edge_attributes(D, name='weight', values=0)

    # The LCA of mu and mv in T is the shared ancestor of mu and mv that is
    # located farthest from the root.
    lca_gen = nx.tree_all_pairs_lowest_common_ancestor(
        TR, root=root, pairs=meta_to_wuv.keys())

    for (mu, mv), lca in lca_gen:
        w, uv = meta_to_wuv[(mu, mv)]
        if lca == mu:
            # If u is an ancestor of v in TR, then add edge u->v to D
            D.add_edge(lca, mv, weight=w, generator=uv)
        elif lca == mv:
            # If v is an ancestor of u in TR, then add edge v->u to D
            D.add_edge(lca, mu, weight=w, generator=uv)
        else:
            # If neither u nor v is a ancestor of the other in TR
            # let t = lca(TR, u, v) and add edges t->u and t->v
            # Track the original edge that GENERATED these edges.
            D.add_edge(lca, mu, weight=w, generator=uv)
            D.add_edge(lca, mv, weight=w, generator=uv)

    # Then compute a minimum rooted branching
    try:
        # Note the original edges must be directed towards to root for the
        # branching to give us a bridge-augmentation.
        A = _minimum_rooted_branching(D, root)
    except nx.NetworkXException:
        # If there is no branching then augmentation is not possible
        raise nx.NetworkXUnfeasible('no 2-edge-augmentation possible')

    # For each edge e, in the branching that did not belong to the directed
    # tree T, add the correponding edge that **GENERATED** it (this is not
    # necesarilly e itself!)

    # ensure the third case does not generate edges twice
    bridge_connectors = set()
    for mu, mv in A.edges():
        data = D.get_edge_data(mu, mv)
        if 'generator' in data:
            # Add the avail edge that generated the branching edge.
            edge = data['generator']
            bridge_connectors.add(edge)

    for edge in bridge_connectors:
        yield edge
Ejemplo n.º 46
0
# code to filter by alpha_exogenous. Above 0.6 removes all sites without
# observations (includes uncertain observations)

if graph_filter:
    G = NX.DiGraph()
    for n,d in H.nodes_iter(data= True):
        if d['alpha_exogenous'] > 0.6:
            G.add_node(n,d)
    for u,v,d in H.out_edges_iter(data = True):
        if u in G.nodes() and v in G.nodes():
            G.add_edge(u,v,d)
    H = G        

#print H.nodes(data=True)        

betweenness = alpha_centrality(NX.reverse(H),0.36,evalue = 'alpha_exogenous')

print betweenness



NX.set_node_attributes(H, 'alpha_centrality', betweenness)

inodes = 0
alpha = 0.0
exo = 0.0
for n,d in H.nodes_iter(data= True):
    inodes = inodes + 1
    alpha = alpha + d['alpha_centrality']
    exo = exo + d['alpha_exogenous']