Ejemplo n.º 1
0
 def _dfs_cycle_forest(G, root=None):
     H = eg.DiGraph()
     nodes = []
     for u, v, d in dfs_labeled_edges(G, source=root):
         if d == "forward":
             # `dfs_labeled_edges()` yields (root, root, 'forward')
             # if it is beginning the search on a new connected
             # component.
             if u == v:
                 H.add_node(v, parent=None)
                 nodes.append(v)
             else:
                 H.add_node(v, parent=u)
                 H.add_edge(v, u, nontree=False)
                 nodes.append(v)
         # `dfs_labeled_edges` considers nontree edges in both
         # orientations, so we need to not add the edge if it its
         # other orientation has been added.
         elif d == "nontree" and v not in H[u]:
             H.add_edge(v, u, nontree=True)
         else:
             # Do nothing on 'reverse' edges; we only care about
             # forward and nontree edges.
             pass
     return H, nodes
Ejemplo n.º 2
0
def Transition(LG):
    N = len(LG)
    M = LG.size()
    LLG = eg.DiGraph()
    for i in LG.edges:
        (u, v, t) = i
        LLG.add_edge(u, v)
        LLG.add_edge(v, u)
    degree = LLG.degree()
    P = np.zeros([2 * M, 2 * M])
    pair = []
    k = 0
    l = 0
    for i in LLG.edges:
        l = 0
        for j in LLG.edges:
            (u, v, t) = i
            (x, y, z) = j
            if v == x and u != y:
                P[k][l] = 1 / (degree[v] - 1)
            l += 1
        k += 1
    a = 0
    for i in LLG.edges:
        (u, v, t) = i
        pair.append([u, v])
        a += 1
    return P, pair
Ejemplo n.º 3
0
def _find_ancestors_of_node(G, node_t):
    G_reverse = eg.DiGraph()
    for node in G.nodes:
        G_reverse.add_node(node)
    for edge in G.edges:
        G_reverse.add_edge(edge[1], edge[0])
    node_dict = eg.Dijkstra(G_reverse, node=node_t)
    ancestors = []
    for node in G.nodes:
        if node_dict[node] < float("inf") and node != node_t:
            ancestors.append(node)
    return ancestors
Ejemplo n.º 4
0
    def make_graph(self, graph_xml, graphml_keys, defaults, G=None):
        # set default graph type
        edgedefault = graph_xml.get("edgedefault", None)
        if G is None:
            if edgedefault == "directed":
                G = eg.MultiDiGraph()
            else:
                G = eg.MultiGraph()
        # set defaults for graph attributes
        G.graph["node_default"] = {}
        G.graph["edge_default"] = {}
        for key_id, value in defaults.items():
            key_for = graphml_keys[key_id]["for"]
            name = graphml_keys[key_id]["name"]
            python_type = graphml_keys[key_id]["type"]
            if key_for == "node":
                G.graph["node_default"].update({name: python_type(value)})
            if key_for == "edge":
                G.graph["edge_default"].update({name: python_type(value)})
        # hyperedges are not supported
        hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
        if hyperedge is not None:
            raise eg.EasyGraphError("GraphML reader doesn't support hyperedges")
        # add nodes
        for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
            self.add_node(G, node_xml, graphml_keys, defaults)
        # add edges
        for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
            self.add_edge(G, edge_xml, graphml_keys)
        # add graph data
        data = self.decode_data_elements(graphml_keys, graph_xml)
        G.graph.update(data)

        # switch to Graph or DiGraph if no parallel edges were found
        if self.multigraph:
            return G

        G = eg.DiGraph(G) if G.is_directed() else eg.Graph(G)
        # add explicit edge "id" from file as attribute in eg graph.
        eg.set_edge_attributes(G, values=self.edge_ids, name="id")
        return G
Ejemplo n.º 5
0
def parse_gml_lines(lines, label, destringizer):
    """Parse GML `lines` into a graph."""
    def tokenize():
        patterns = [
            r"[A-Za-z][0-9A-Za-z_]*\b",  # keys
            # reals
            r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*|INF)(?:[Ee][+-]?[0-9]+)?",
            r"[+-]?[0-9]+",  # ints
            r'".*?"',  # strings
            r"\[",  # dict start
            r"\]",  # dict end
            r"#.*$|\s+",  # comments and whitespaces
        ]
        tokens = re.compile("|".join(f"({pattern})" for pattern in patterns))
        lineno = 0
        for line in lines:
            length = len(line)
            pos = 0
            while pos < length:
                match = tokens.match(line, pos)
                if match is None:
                    m = f"cannot tokenize {line[pos:]} at ({lineno + 1}, {pos + 1})"
                    raise EasyGraphError(m)
                for i in range(len(patterns)):
                    group = match.group(i + 1)
                    if group is not None:
                        if i == 0:  # keys
                            value = group.rstrip()
                        elif i == 1:  # reals
                            value = float(group)
                        elif i == 2:  # ints
                            value = int(group)
                        else:
                            value = group
                        if i != 6:  # comments and whitespaces
                            yield Token(Pattern(i), value, lineno + 1, pos + 1)
                        pos += len(group)
                        break
            lineno += 1
        yield Token(None, None, lineno + 1, 1)  # EOF

    def unexpected(curr_token, expected):
        category, value, lineno, pos = curr_token
        value = repr(value) if value is not None else "EOF"
        raise EasyGraphError(
            f"expected {expected}, found {value} at ({lineno}, {pos})")

    def consume(curr_token, category, expected):
        if curr_token.category == category:
            return next(tokens)
        unexpected(curr_token, expected)

    def parse_dict(curr_token):
        # dict start
        curr_token = consume(curr_token, Pattern.DICT_START, "'['")
        # dict contents
        curr_token, dct = parse_kv(curr_token)
        # dict end
        curr_token = consume(curr_token, Pattern.DICT_END, "']'")
        return curr_token, dct

    def parse_kv(curr_token):
        dct = defaultdict(list)
        while curr_token.category == Pattern.KEYS:
            key = curr_token.value
            curr_token = next(tokens)
            category = curr_token.category
            if category == Pattern.REALS or category == Pattern.INTS:
                value = curr_token.value
                curr_token = next(tokens)
            elif category == Pattern.STRINGS:
                value = unescape(curr_token.value[1:-1])
                if destringizer:
                    try:
                        value = destringizer(value)
                    except ValueError:
                        pass
                curr_token = next(tokens)
            elif category == Pattern.DICT_START:
                curr_token, value = parse_dict(curr_token)
            else:
                if key in ("id", "label", "source", "target"):
                    try:
                        # String convert the token value
                        value = unescape(str(curr_token.value))
                        if destringizer:
                            try:
                                value = destringizer(value)
                            except ValueError:
                                pass
                        curr_token = next(tokens)
                    except Exception:
                        msg = ("an int, float, string, '[' or string" +
                               " convertable ASCII value for node id or label")
                        unexpected(curr_token, msg)
                elif curr_token.value in {"NAN", "INF"}:
                    value = float(curr_token.value)
                    curr_token = next(tokens)
                else:  # Otherwise error out
                    unexpected(curr_token, "an int, float, string or '['")
            dct[key].append(value)

        def clean_dict_value(value):
            if not isinstance(value, list):
                return value
            if len(value) == 1:
                return value[0]
            if value[0] == LIST_START_VALUE:
                return value[1:]
            return value

        dct = {key: clean_dict_value(value) for key, value in dct.items()}
        return curr_token, dct

    def parse_graph():
        curr_token, dct = parse_kv(next(tokens))
        if curr_token.category is not None:  # EOF
            unexpected(curr_token, "EOF")
        if "graph" not in dct:
            raise EasyGraphError("input contains no graph")
        graph = dct["graph"]
        if isinstance(graph, list):
            raise EasyGraphError("input contains more than one graph")
        return graph

    tokens = tokenize()
    graph = parse_graph()
    directed = graph.pop("directed", False)
    multigraph = graph.pop("multigraph", False)
    if not multigraph:
        G = eg.DiGraph() if directed else eg.Graph()
    else:
        G = eg.MultiDiGraph() if directed else eg.MultiGraph()
    graph_attr = {k: v for k, v in graph.items() if k not in ("node", "edge")}
    G.graph.update(graph_attr)

    def pop_attr(dct, category, attr, i):
        try:
            return dct.pop(attr)
        except KeyError as err:
            raise EasyGraphError(
                f"{category} #{i} has no {attr!r} attribute") from err

    nodes = graph.get("node", [])
    mapping = {}
    node_labels = set()
    for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]):
        id = pop_attr(node, "node", "id", i)
        if id in G:
            raise EasyGraphError(f"node id {id!r} is duplicated")
        if label is not None and label != "id":
            node_label = pop_attr(node, "node", label, i)
            if node_label in node_labels:
                raise EasyGraphError(
                    f"node label {node_label!r} is duplicated")
            node_labels.add(node_label)
            mapping[id] = node_label
        G.add_node(id, **node)

    edges = graph.get("edge", [])
    for i, edge in enumerate(edges if isinstance(edges, list) else [edges]):
        source = pop_attr(edge, "edge", "source", i)
        target = pop_attr(edge, "edge", "target", i)
        if source not in G:
            raise EasyGraphError(f"edge #{i} has undefined source {source!r}")
        if target not in G:
            raise EasyGraphError(f"edge #{i} has undefined target {target!r}")
        if not multigraph:
            if not G.has_edge(source, target):
                G.add_edge(source, target, **edge)
            else:
                arrow = "->" if directed else "--"
                msg = f"edge #{i} ({source!r}{arrow}{target!r}) is duplicated"
                raise EasyGraphError(msg)
        else:
            key = edge.pop("key", None)
            if key is not None and G.has_edge(source, target, key):
                arrow = "->" if directed else "--"
                msg = f"edge #{i} ({source!r}{arrow}{target!r}, {key!r})"
                msg2 = 'Hint: If multigraph add "multigraph 1" to file header.'
                raise EasyGraphError(msg + " is duplicated\n" + msg2)
            G.add_edge(source, target, key, **edge)

    if label is not None and label != "id":
        G = eg.relabel_nodes(G, mapping)
    return G
Ejemplo n.º 6
0
def maxBlockFast(G, k, f_set=None, L=None, flag_weight=False):
    """Structural hole spanners detection via maxBlockFast method.

    Parameters
    ----------
    G: easygraph.DiGraph

    G: easygraph.DiGraph

    k: int
        top - k structural hole spanners.

    f_set: dict, optional
        user vi shares his/her information on network G at a rate fi.
        default is a random [0,1) integer for each node

    L: int, optional (default: log2n)
        Simulation time L for maxBlockFast.

    flag_weight: bool, optional (default: False)
        Denotes whether each edge has attribute 'weight'

    See Also
    -------
    maxBlock

    Examples
    --------
    # >>> maxBlockFast(G, 100)

    References
    ----------
    .. [1] https://doi.org/10.1016/j.ins.2019.07.072

    """
    h_set = {}
    n = G.number_of_nodes()
    if L is None:
        L = math.ceil(math.log(n, 2))
    # print("L:", L)
    if f_set is None:
        f_set = {}
        for node in G.nodes:
            f_set[node] = random.random()
    for node in G.nodes:
        h_set[node] = 0
    if not flag_weight:
        for edge in G.edges:
            G[edge[0]][edge[1]]["weight"] = random.random()
    for l in range(L):
        if l % 10000 == 0:
            print(l, "/", L, "...")
        # Generate a graph G & = (V, E & ) from G under the live-edge graph model
        G_live = G.copy()
        for edge in G_live.edges:
            wij = G_live[edge[0]][edge[1]]["weight"]
            toss = random.random() + 0.1
            if toss >= wij:
                G_live.remove_edge(edge[0], edge[1])

        G0 = G_live.copy()
        d_dict = {}
        ns = number_strongly_connected_components(G0)
        non_considered_nodes = set()
        for node in G0.nodes:
            d_dict[node] = 1
            non_considered_nodes.add(node)
        G_p_1 = G0.copy()
        for i in range(ns):
            separation_nodes, SCC_mapping, incoming_info = _find_separation_nodes(
                G_p_1)
            # print("separation_nodes:", separation_nodes)
            if len(separation_nodes) > 0:
                chosen_node = -1
                for node in separation_nodes:
                    node_dict = eg.Dijkstra(G_p_1, node=node)
                    flag = True
                    for other_sep in separation_nodes:
                        if other_sep != node:
                            if node_dict[other_sep] < float("inf"):
                                flag = False
                                break
                    if flag:
                        chosen_node = node
                        break
                # print("chosen_node:", chosen_node)
                G_tr = eg.DiGraph()
                n_set = {}
                desc_set = {}
                _get_idom(G_p_1, G_tr, chosen_node, n_set, desc_set)
                ancestors = _find_ancestors_of_node(G_p_1, chosen_node)
                sum_fi = 0
                for node_av in ancestors:
                    sum_fi += f_set[node_av]
                for node_u in G_tr.nodes:
                    D_u = 0
                    for desc in desc_set[node_u]:
                        if desc not in d_dict.keys():
                            print(
                                "Error: desc:",
                                desc,
                                "node_u",
                                node_u,
                                "d_dict:",
                                d_dict,
                            )
                            print(desc_set[node_u])
                        D_u += d_dict[desc]
                    if node_u != chosen_node:
                        h_set[node_u] += (f_set[chosen_node] + sum_fi) * D_u
                    elif node_u == chosen_node:
                        h_set[node_u] += sum_fi * D_u
                d_dict[chosen_node] = 0
                for node_vj in G_tr.nodes:
                    d_dict[chosen_node] += d_dict[node_vj]
                G_p = G_p_1.copy()
                for neighbor in G_p_1.neighbors(node=chosen_node):
                    G_p.remove_edge(chosen_node, neighbor)
                G_p_1 = G_p.copy()
                non_considered_nodes.remove(chosen_node)
            else:
                V_set = set()
                for key in SCC_mapping.keys():
                    for node in SCC_mapping[key]:
                        if (node in non_considered_nodes) and (
                                node not in incoming_info.keys()):
                            V_set.add(node)
                    if len(V_set) > 0:
                        break
                # print("V_set:", V_set)
                for node_v in V_set:
                    G_tr = eg.DiGraph()
                    n_set = {}
                    desc_set = {}
                    _get_idom(G_p_1, G_tr, node_v, n_set, desc_set)
                    for node_u in G_tr.nodes:
                        D_u = 0
                        for desc in desc_set[node_u]:
                            if desc not in d_dict.keys():
                                print(
                                    "Error: desc:",
                                    desc,
                                    "node_u",
                                    node_u,
                                    "d_dict:",
                                    d_dict,
                                )
                                print(desc_set[node_u])
                            D_u += d_dict[desc]
                        h_set[node_u] += f_set[node_v] * D_u
                G_p = G_p_1.copy()
                for node_v in V_set:
                    non_considered_nodes.remove(node_v)
                    for neighbor in G_p_1.neighbors(node=node_v):
                        G_p.remove_edge(node_v, neighbor)
                G_p_1 = G_p.copy()
    ave_H_set = {}
    for node in G.nodes:
        ave_H_set[node] = h_set[node] * n / L
    ordered_set = sorted(ave_H_set.items(), key=lambda x: x[1], reverse=True)
    S_list = []
    for i in range(k):
        S_list.append((ordered_set[i])[0])
    return S_list
Ejemplo n.º 7
0
def _find_topk_shs_under_l(G, f_set, k, L):
    """Find the top-k structural hole spanners under L simulations.

    Parameters
    ----------
    G: easygraph.DiGraph

    f_set: dict
        user vi shares his/her information on network G at a rate fi.

    k: int
        top - k structural hole spanners.

    L: int
        the number of simulations.

    Returns
    -------
    S_list : list
        A set S of k nodes that block the maximum number of information propagations within L simulations.

    ave_H_Lt_S: float
        the average number of blocked information propagations by the nodes in set S with L t simulations.

    """
    h_set = {}
    n = G.number_of_nodes()
    for node in G.nodes:
        h_set[node] = 0
    for l in range(L):
        if l % 100000 == 0:
            print("[", l, "/", L, "] find topk shs under L")
        # Choose a node s from the n nodes in G randomly
        node_s = random.choice(list(G.nodes))
        # Generate a graph G & = (V, E & ) from G under the live-edge graph model
        G_live = G.copy()
        for edge in G_live.edges:
            wij = G_live[edge[0]][edge[1]]["weight"]
            toss = random.random() + 0.1
            if toss >= wij:
                G_live.remove_edge(edge[0], edge[1])
        # Obtain the induced subgraph by the set R G & (s ) of reachable nodes from s
        R_set = eg.connected_component_of_node(G_live, node_s)
        G_subgraph = eg.DiGraph()
        for node in R_set:
            G_subgraph.add_node(node)
        for edge in G_live.edges:
            if edge[0] in G_subgraph.nodes and edge[1] in G_subgraph.nodes:
                G_subgraph.add_edge(edge[0], edge[1])
        # Find the immediate dominator idom (v ) of each node v $ V && \ { s } in G
        # Construct an s -rooted dominator tree
        # Calculate the number of proper descendants n u of each node u $ V &&
        G_tr = eg.DiGraph()
        n_set = {}
        desc_set = {}
        _get_idom(G_subgraph, G_tr, node_s, n_set, desc_set)
        for node_u in G_tr.nodes:
            if node_u != node_s:
                # the number of blocked information propagations by node u
                h_set[node_u] += n_set[node_u] * f_set[node_s]
    ave_H_set = {}
    for node in G.nodes:
        ave_H_set[node] = h_set[node] * n / L
    ordered_set = sorted(ave_H_set.items(), key=lambda x: x[1], reverse=True)
    S_list = []
    ave_H_Lt_S = 0
    for i in range(k):
        S_list.append((ordered_set[i])[0])
        ave_H_Lt_S += (ordered_set[i])[1]
    return S_list, ave_H_Lt_S
Ejemplo n.º 8
0
def structural_hole_influence_index(
    G_original,
    S,
    C,
    model,
    variant=False,
    seedRatio=0.05,
    randSeedIter=10,
    countIterations=100,
    Directed=True,
):
    """Returns the SHII metric of each seed.

    Parameters
    ----------
    G_original: easygraph.Graph or easygraph.DiGraph

    S: list of int
        A list of nodes which are structural hole spanners.

    C: list of list
        Each list includes the nodes in one community.

    model: string
        Propagation Model. Should be IC or LT.

    variant: bool, default is False
        Whether returns variant SHII metrics or not.
        variant SHII = # of the influenced outsider / # of the influenced insiders
        SHII = # of the influenced outsiders / # of the total influenced nodes

    seedRatio: float, default is 0.05
        # of sampled seeds / # of nodes of the community that the given SHS belongs to.

    randSeedIter: int, default is 10
        How many iterations to sample seeds.

    countIterations: int default is 100
        Number of monte carlo simulations to be used.

    Directed: bool, default is True
        Whether the graph is directed or not.

    Returns
    -------
    seed_shii_pair : dict
        the SHII metric of each seed

    Examples
    --------
    # >>> structural_hole_influence_index(G, [3, 20, 9], Com, 'LT', seedRatio=0.1, Directed=False)

    References
    ----------
    .. [1] https://dl.acm.org/doi/pdf/10.1145/2939672.2939807
    .. [2] https://github.com/LifangHe/KDD16_HAM/tree/master/SHII_metric

    """
    if not Directed:
        G = eg.DiGraph()
        for edge in G_original.edges:
            G.add_edge(edge[0], edge[1])
            G.add_edge(edge[1], edge[0])
    else:
        G = G_original.copy()
    # form pair like {node_1:community_label_1,node_2:community_label_2}
    node_label_pair = {}
    for community_label in range(len(C)):
        for node_i in range(len(C[community_label])):
            node_label_pair[C[community_label][node_i]] = community_label
    # print(node_label_pair)
    seed_shii_pair = {}
    for community_label in range(len(C)):
        nodesInCommunity = []
        seedSetInCommunity = []
        for node in node_label_pair.keys():
            if node_label_pair[node] == community_label:
                nodesInCommunity.append(node)
                if node in S:
                    seedSetInCommunity.append(node)

        seedSetSize = int(math.ceil(len(nodesInCommunity) * seedRatio))

        if len(seedSetInCommunity) == 0:
            continue

        for seed in seedSetInCommunity:
            print(">>>>>> processing seed ", seed, " now.")
            oneSeedSet = []
            if node not in oneSeedSet:
                oneSeedSet.append(seed)
            seedNeighborSet = []
            # using BFS to add neighbors of the SH spanner to the seedNeighborSet as seed candidates
            queue = []
            queue.append(seed)
            while len(queue) > 0:
                cur_node = queue[0]
                count_neighbor = 0
                for neighbor in G.neighbors(node=cur_node):
                    if neighbor not in seedNeighborSet:
                        seedNeighborSet.append(neighbor)
                    count_neighbor = count_neighbor + 1
                if count_neighbor > 0:
                    if (
                        len(queue) == 1
                        and len(oneSeedSet) + len(seedNeighborSet) < seedSetSize
                    ):
                        for node in seedNeighborSet:
                            if node not in oneSeedSet:
                                oneSeedSet.append(node)
                            queue.append(node)
                        seedNeighborSet.clear()
                queue.pop(0)

            avg_censor_score_1 = 0.0
            avg_censor_score_2 = 0.0

            for randIter in range(randSeedIter):
                if randIter % 5 == 0:
                    print("seed ", seed, ": ", randIter, " in ", randSeedIter)
                randSeedSet = []
                for node in oneSeedSet:
                    randSeedSet.append(node)
                seedNeighbors = []
                for node in seedNeighborSet:
                    seedNeighbors.append(node)
                while len(seedNeighbors) > 0 and len(randSeedSet) < seedSetSize:
                    r = random.randint(0, len(seedNeighbors) - 1)
                    if seedNeighbors[r] not in randSeedSet:
                        randSeedSet.append(seedNeighbors[r])
                    seedNeighbors.pop(r)

                if model == "IC":
                    censor_score_1, censor_score_2 = _independent_cascade(
                        G,
                        randSeedSet,
                        community_label,
                        countIterations,
                        node_label_pair,
                    )
                elif model == "LT":
                    censor_score_1, censor_score_2 = _linear_threshold(
                        G,
                        randSeedSet,
                        community_label,
                        countIterations,
                        node_label_pair,
                    )
                avg_censor_score_1 += censor_score_1 / randSeedIter
                avg_censor_score_2 += censor_score_2 / randSeedIter
                # print("seed ", seed, " avg_censor_score in ", randIter, "is ", censor_score_1 / randSeedIter)
            if variant:
                seed_shii_pair[seed] = avg_censor_score_2
            else:
                seed_shii_pair[seed] = avg_censor_score_1
    return seed_shii_pair
Ejemplo n.º 9
0
    def make_graph(self, graph_xml):
        edgedefault = graph_xml.get("defaultedgetype", None)
        if edgedefault == "directed":
            G = eg.MultiDiGraph()
        else:
            G = eg.MultiGraph()

        # graph attributes
        graph_name = graph_xml.get("name", "")
        if graph_name != "":
            G.graph["name"] = graph_name
        graph_start = graph_xml.get("start")
        if graph_start is not None:
            G.graph["start"] = graph_start
        graph_end = graph_xml.get("end")
        if graph_end is not None:
            G.graph["end"] = graph_end
        graph_mode = graph_xml.get("mode", "")
        if graph_mode == "dynamic":
            G.graph["mode"] = "dynamic"
        else:
            G.graph["mode"] = "static"

        # timeformat
        self.timeformat = graph_xml.get("timeformat")
        if self.timeformat == "date":
            self.timeformat = "string"

        # node and edge attributes
        attributes_elements = graph_xml.findall(
            f"{{{self.NS_GEXF}}}attributes")
        # dictionaries to hold attributes and attribute defaults
        node_attr = {}
        node_default = {}
        edge_attr = {}
        edge_default = {}
        for a in attributes_elements:
            attr_class = a.get("class")
            if attr_class == "node":
                na, nd = self.find_gexf_attributes(a)
                node_attr.update(na)
                node_default.update(nd)
                G.graph["node_default"] = node_default
            elif attr_class == "edge":
                ea, ed = self.find_gexf_attributes(a)
                edge_attr.update(ea)
                edge_default.update(ed)
                G.graph["edge_default"] = edge_default
            else:
                raise  # unknown attribute class

        # Hack to handle Gephi0.7beta bug
        # add weight attribute
        ea = {
            "weight": {
                "type": "double",
                "mode": "static",
                "title": "weight"
            }
        }
        ed = {}
        edge_attr.update(ea)
        edge_default.update(ed)
        G.graph["edge_default"] = edge_default

        # add nodes
        nodes_element = graph_xml.find(f"{{{self.NS_GEXF}}}nodes")
        if nodes_element is not None:
            for node_xml in nodes_element.findall(f"{{{self.NS_GEXF}}}node"):
                self.add_node(G, node_xml, node_attr)

        # add edges
        edges_element = graph_xml.find(f"{{{self.NS_GEXF}}}edges")
        if edges_element is not None:
            for edge_xml in edges_element.findall(f"{{{self.NS_GEXF}}}edge"):
                self.add_edge(G, edge_xml, edge_attr)

        # switch to Graph or DiGraph if no parallel edges were found.
        if self.simple_graph:
            if G.is_directed():
                G = eg.DiGraph(G)
            else:
                G = eg.Graph(G)
        return G
Ejemplo n.º 10
0
def fast_erdos_renyi_P(n, p, directed=False, FilePath=None):
    """Given the number of nodes and the probability of edge creation, return an Erdős-Rényi random graph, and store the graph in a document. Use this function for generating a huge scale graph.

    Parameters
    ----------
    n : int
        The number of nodes.
    p : float
        Probability for edge creation.
    directed : bool, optional (default=False)
        If True, this function returns a directed graph.
    FilePath : string
        The file path of storing the graph G.

    Returns
    -------
    G : graph
        an Erdős-Rényi random graph.

    Examples
    --------
    Returns an Erdős-Rényi random graph G

    >>> erdos_renyi_P(100,0.5,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt")

    References
    ----------
    .. [1] P. Erdős and A. Rényi, On Random Graphs, Publ. Math. 6, 290 (1959).
    .. [2] E. N. Gilbert, Random Graphs, Ann. Math. Stat., 30, 1141 (1959).
    """
    if directed:
        G = eg.DiGraph()
        w = -1
        lp = math.log(1.0 - p)
        v = 0
        adjacent = {}
        while v < n:
            lr = math.log(1.0 - random.random())
            w = w + 1 + int(lr / lp)
            if v == w:  # avoid self loops
                w = w + 1
            while v < n <= w:
                w = w - n
                v = v + 1
                if v == w:  # avoid self loops
                    w = w + 1
            if v < n:
                G.add_edge(v, w)
                if v not in adjacent:
                    adjacent[v] = []
                    adjacent[v].append(w)
                else:
                    adjacent[v].append(w)
    else:
        G = eg.Graph()
        w = -1
        lp = math.log(1.0 - p)
        v = 1
        adjacent = {}
        while v < n:
            lr = math.log(1.0 - random.random())
            w = w + 1 + int(lr / lp)
            while w >= v and v < n:
                w = w - v
                v = v + 1
            if v < n:
                G.add_edge(v, w)
                if v not in adjacent:
                    adjacent[v] = []
                    adjacent[v].append(w)
                else:
                    adjacent[v].append(w)
                if w not in adjacent:
                    adjacent[w] = []
                    adjacent[w].append(v)
                else:
                    adjacent[w].append(v)

    writeRandomNetworkToFile(n, adjacent, FilePath)
    return G
Ejemplo n.º 11
0
def erdos_renyi_M(n, edge, directed=False, FilePath=None):
    """Given the number of nodes and the number of edges, return an Erdős-Rényi random graph, and store the graph in a document.

    Parameters
    ----------
    n : int
        The number of nodes.
    edge : int
        The number of edges.
    directed : bool, optional (default=False)
        If True, this function returns a directed graph.
    FilePath : string
        The file path of storing the graph G.

    Returns
    -------
    G : graph
        an Erdős-Rényi random graph.

    Examples
    --------
    Returns an Erdős-Rényi random graph G.

    >>> erdos_renyi_M(100,180,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt")

    References
    ----------
    .. [1] P. Erdős and A. Rényi, On Random Graphs, Publ. Math. 6, 290 (1959).
    .. [2] E. N. Gilbert, Random Graphs, Ann. Math. Stat., 30, 1141 (1959).
    """
    if directed:
        G = eg.DiGraph()
        adjacent = {}
        mmax = n * (n - 1)
        if edge >= mmax:
            for i in range(n):
                for j in range(n):
                    if i != j:
                        G.add_edge(i, j)
                        if i not in adjacent:
                            adjacent[i] = []
                            adjacent[i].append(j)
                        else:
                            adjacent[i].append(j)
            return G
        count = 0
        while count < edge:
            i = random.randint(0, n - 1)
            j = random.randint(0, n - 1)
            if i == j or G.has_edge(i, j):
                continue
            else:
                count = count + 1
                if i not in adjacent:
                    adjacent[i] = []
                    adjacent[i].append(j)
                else:
                    adjacent[i].append(j)
                G.add_edge(i, j)
    else:
        G = eg.Graph()
        adjacent = {}
        mmax = n * (n - 1) / 2
        if edge >= mmax:
            for i in range(n):
                for j in range(n):
                    if i != j:
                        G.add_edge(i, j)
                        if i not in adjacent:
                            adjacent[i] = []
                            adjacent[i].append(j)
                        else:
                            adjacent[i].append(j)
                        if j not in adjacent:
                            adjacent[j] = []
                            adjacent[j].append(i)
                        else:
                            adjacent[j].append(i)
            return G
        count = 0
        while count < edge:
            i = random.randint(0, n - 1)
            j = random.randint(0, n - 1)
            if i == j or G.has_edge(i, j):
                continue
            else:
                count = count + 1
                if i not in adjacent:
                    adjacent[i] = []
                    adjacent[i].append(j)
                else:
                    adjacent[i].append(j)
                if j not in adjacent:
                    adjacent[j] = []
                    adjacent[j].append(i)
                else:
                    adjacent[j].append(i)
                G.add_edge(i, j)

    writeRandomNetworkToFile(n, adjacent, FilePath)
    return G
Ejemplo n.º 12
0
def erdos_renyi_P(n, p, directed=False, FilePath=None):
    """Given the number of nodes and the probability of edge creation, return an Erdős-Rényi random graph, and store the graph in a document.

    Parameters
    ----------
    n : int
        The number of nodes.
    p : float
        Probability for edge creation.
    directed : bool, optional (default=False)
        If True, this function returns a directed graph.
    FilePath : string
        The file path of storing the graph G.

    Returns
    -------
    G : graph
        an Erdős-Rényi random graph.

    Examples
    --------
    Returns an Erdős-Rényi random graph G

    >>> erdos_renyi_P(100,0.5,directed=False,FilePath="/users/fudanmsn/downloads/RandomNetwork.txt")

    References
    ----------
    .. [1] P. Erdős and A. Rényi, On Random Graphs, Publ. Math. 6, 290 (1959).
    .. [2] E. N. Gilbert, Random Graphs, Ann. Math. Stat., 30, 1141 (1959).
    """
    if directed:
        G = eg.DiGraph()
        adjacent = {}
        probability = 0.0
        for i in range(n):
            for j in range(i + 1, n):
                probability = random.random()
                if probability < p:
                    if i not in adjacent:
                        adjacent[i] = []
                        adjacent[i].append(j)
                    else:
                        adjacent[i].append(j)
                    G.add_edge(i, j)
    else:
        G = eg.Graph()
        adjacent = {}
        probability = 0.0
        for i in range(n):
            for j in range(i + 1, n):
                probability = random.random()
                if probability < p:
                    if i not in adjacent:
                        adjacent[i] = []
                        adjacent[i].append(j)
                    else:
                        adjacent[i].append(j)
                    if j not in adjacent:
                        adjacent[j] = []
                        adjacent[j].append(i)
                    else:
                        adjacent[j].append(i)
                    G.add_edge(i, j)

    writeRandomNetworkToFile(n, adjacent, FilePath)
    return G
Ejemplo n.º 13
0
def condensation(G, scc=None):
    """Returns the condensation of G.

    The condensation of G is the graph with each of the strongly connected
    components contracted into a single node.

    Parameters
    ----------
    G : easygraph.DiGraph
       A directed graph.

    scc:  list or generator (optional, default=None)
       Strongly connected components. If provided, the elements in
       `scc` must partition the nodes in `G`. If not provided, it will be
       calculated as scc=strongly_connected_components(G).

    Returns
    -------
    C : easygraph.DiGraph
       The condensation graph C of G.  The node labels are integers
       corresponding to the index of the component in the list of
       strongly connected components of G.  C has a graph attribute named
       'mapping' with a dictionary mapping the original nodes to the
       nodes in C to which they belong.  Each node in C also has a node
       attribute 'members' with the set of original nodes in G that
       form the SCC that the node in C represents.

    Examples
    --------
    # >>> condensation(G)

    Notes
    -----
    After contracting all strongly connected components to a single node,
    the resulting graph is a directed acyclic graph.

    """
    if scc is None:
        scc = strongly_connected_components(G)
    mapping = {}
    incoming_info = {}
    members = {}
    C = eg.DiGraph()
    # Add mapping dict as graph attribute
    C.graph["mapping"] = mapping
    if len(G) == 0:
        return C
    for i, component in enumerate(scc):
        members[i] = component
        mapping.update((n, i) for n in component)
    number_of_components = i + 1
    for i in range(number_of_components):
        C.add_node(i, member=members[i], incoming=set())
    C.add_nodes(range(number_of_components))
    for edge in G.edges:
        if mapping[edge[0]] != mapping[edge[1]]:
            C.add_edge(mapping[edge[0]], mapping[edge[1]])
            if edge[1] not in incoming_info.keys():
                incoming_info[edge[1]] = set()
            incoming_info[edge[1]].add(edge[0])
    C.graph["incoming_info"] = incoming_info
    return C
Ejemplo n.º 14
0
        _computeTieStrength(G, edge[0], edge[1])
        _commonUpdate(G, edge[0], edge[1], threshold, score_dict)
    for edge in edges_delete:
        G.remove_edge(edge[0], edge[1])
        _commonUpdate(G, edge[0], edge[1], threshold, score_dict)
    ordered_set = sorted(score_dict.items(), key=lambda x: x[1], reverse=True)
    SHS_list = []
    for i in range(k):
        SHS_list.append((ordered_set[i])[0])
    print("updated score:", score_dict)
    print("top-k nodes:", SHS_list)
    return SHS_list


if __name__ == "__main__":
    G = eg.DiGraph()
    G.add_edge(1, 5)
    G.add_edge(1, 4)
    G.add_edge(2, 1)
    G.add_edge(2, 6)
    G.add_edge(2, 9)
    G.add_edge(3, 4)
    G.add_edge(3, 1)
    G.add_edge(4, 3)
    G.add_edge(4, 1)
    G.add_edge(4, 5)
    G.add_edge(5, 4)
    G.add_edge(5, 8)
    G.add_edge(6, 1)
    G.add_edge(6, 2)
    G.add_edge(7, 2)
Ejemplo n.º 15
0
    def setup_class(cls):
        cls.simple_directed_data = """<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2">
    <graph mode="static" defaultedgetype="directed">
        <nodes>
            <node id="0" label="Hello" />
            <node id="1" label="Word" />
        </nodes>
        <edges>
            <edge id="0" source="0" target="1" />
        </edges>
    </graph>
</gexf>
"""
        cls.simple_directed_graph = eg.DiGraph()
        cls.simple_directed_graph.add_node("0", label="Hello")
        cls.simple_directed_graph.add_node("1", label="World")
        cls.simple_directed_graph.add_edge("0", "1", id="0")

        cls.simple_directed_fh = io.BytesIO(
            cls.simple_directed_data.encode("UTF-8"))

        cls.attribute_data = """<?xml version="1.0" encoding="UTF-8"?>\
<gexf xmlns="http://www.gexf.net/1.2draft" xmlns:xsi="http://www.w3.\
org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gexf.net/\
1.2draft http://www.gexf.net/1.2draft/gexf.xsd" version="1.2">
  <meta lastmodifieddate="2009-03-20">
    <creator>Gephi.org</creator>
    <description>A Web network</description>
  </meta>
  <graph defaultedgetype="directed">
    <attributes class="node">
      <attribute id="0" title="url" type="string"/>
      <attribute id="1" title="indegree" type="integer"/>
      <attribute id="2" title="frog" type="boolean">
        <default>true</default>
      </attribute>
    </attributes>
    <nodes>
      <node id="0" label="Gephi">
        <attvalues>
          <attvalue for="0" value="https://gephi.org"/>
          <attvalue for="1" value="1"/>
          <attvalue for="2" value="false"/>
        </attvalues>
      </node>
      <node id="1" label="Webatlas">
        <attvalues>
          <attvalue for="0" value="http://webatlas.fr"/>
          <attvalue for="1" value="2"/>
          <attvalue for="2" value="false"/>
        </attvalues>
      </node>
      <node id="2" label="RTGI">
        <attvalues>
          <attvalue for="0" value="http://rtgi.fr"/>
          <attvalue for="1" value="1"/>
          <attvalue for="2" value="true"/>
        </attvalues>
      </node>
      <node id="3" label="BarabasiLab">
        <attvalues>
          <attvalue for="0" value="http://barabasilab.com"/>
          <attvalue for="1" value="1"/>
          <attvalue for="2" value="true"/>
        </attvalues>
      </node>
    </nodes>
    <edges>
      <edge id="0" source="0" target="1" label="foo"/>
      <edge id="1" source="0" target="2"/>
      <edge id="2" source="1" target="0"/>
      <edge id="3" source="2" target="1"/>
      <edge id="4" source="0" target="3"/>
    </edges>
  </graph>
</gexf>
"""
        cls.attribute_graph = eg.DiGraph()
        cls.attribute_graph.graph["node_default"] = {"frog": True}
        cls.attribute_graph.add_node("0",
                                     label="Gephi",
                                     url="https://gephi.org",
                                     indegree=1,
                                     frog=False)
        cls.attribute_graph.add_node("1",
                                     label="Webatlas",
                                     url="http://webatlas.fr",
                                     indegree=2,
                                     frog=False)
        cls.attribute_graph.add_node("2",
                                     label="RTGI",
                                     url="http://rtgi.fr",
                                     indegree=1,
                                     frog=True)
        cls.attribute_graph.add_node(
            "3",
            label="BarabasiLab",
            url="http://barabasilab.com",
            indegree=1,
            frog=True,
        )
        cls.attribute_graph.add_edge("0", "1", id="0", label="foo")
        cls.attribute_graph.add_edge("0", "2", id="1")
        cls.attribute_graph.add_edge("1", "0", id="2")
        cls.attribute_graph.add_edge("2", "1", id="3")
        cls.attribute_graph.add_edge("0", "3", id="4")
        cls.attribute_fh = io.BytesIO(cls.attribute_data.encode("UTF-8"))

        cls.simple_undirected_data = """<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2">
    <graph mode="static" defaultedgetype="undirected">
        <nodes>
            <node id="0" label="Hello" />
            <node id="1" label="Word" />
        </nodes>
        <edges>
            <edge id="0" source="0" target="1" />
        </edges>
    </graph>
</gexf>
"""
        cls.simple_undirected_graph = eg.Graph()
        cls.simple_undirected_graph.add_node("0", label="Hello")
        cls.simple_undirected_graph.add_node("1", label="World")
        cls.simple_undirected_graph.add_edge("0", "1", id="0")

        cls.simple_undirected_fh = io.BytesIO(
            cls.simple_undirected_data.encode("UTF-8"))