def read_graph(graph_file, dataset_name, directed=False, weighted=False):
    """
    Reads the input network in networkx.

    :param graph_file: The directory where graph in EPGM format is stored
    :param dataset_name: The name of the graph selected out of all the graph heads in EPGM file
    :return: The graph in networkx format
    """
    try:  # assume args.input points to an EPGM graph
        G_epgm = EPGM(graph_file)
        graphs = G_epgm.G["graphs"]
        if (
                dataset_name is None
        ):  # if dataset_name is not given, use the name of the 1st graph head
            dataset_name = graphs[0]["meta"]["label"]
            print(
                "WARNING: dataset name not specified, using dataset '{}' in the 1st graph head"
                .format(dataset_name))
        graph_id = None
        for g in graphs:
            if g["meta"]["label"] == dataset_name:
                graph_id = g["id"]

        g = G_epgm.to_nx(graph_id, directed)
        if weighted:
            raise NotImplementedError
        else:
            # This is the correct way to set the edge weight in a MultiGraph.
            edge_weights = {e: 1 for e in g.edges(keys=True)}
            nx.set_edge_attributes(g, name="weight", values=edge_weights)

    except:  # otherwise, assume arg.input points to an edgelist file
        if weighted:
            g = nx.read_edgelist(
                graph_file,
                nodetype=int,
                data=(("weight", float), ),
                create_using=nx.DiGraph(),
            )
        else:
            g = nx.read_edgelist(graph_file,
                                 nodetype=int,
                                 create_using=nx.DiGraph())
            for edge in g.edges():
                g[edge[0]][edge[1]]["weight"] = 1

        if not directed:
            g = g.to_undirected()

    if not nx.is_connected(g):
        print("Graph is not connected")
        # take the largest connected component as the data
        g_ccs = (g.subgraph(c).copy() for c in nx.connected_components(g))
        g = max(g_ccs, key=len)
        print("Largest subgraph statistics: {} nodes, {} edges".format(
            g.number_of_nodes(), g.number_of_edges()))

    print("Graph statistics: {} nodes, {} edges".format(
        g.number_of_nodes(), g.number_of_edges()))
    return g
Beispiel #2
0
def from_epgm(epgm_location, dataset_name=None, directed=False):
    """
    Imports a graph stored in EPGM format to a NetworkX object

    Args:
        epgm_location (str): The directory containing the EPGM data
        dataset_name (str), optional: The name of the dataset to import
        directed (bool): If True, load as a directed graph, otherwise
            load as an undirected graph

    Returns:
        A NetworkX graph containing the data for the EPGM-stored graph.
    """
    G_epgm = EPGM(epgm_location)
    graphs = G_epgm.G["graphs"]

    # if dataset_name is not given, use the name of the 1st graph head
    if not dataset_name:
        dataset_name = graphs[0]["meta"]["label"]
        warnings.warn(
            "dataset name not specified, using dataset '{}' in the 1st graph head".format(
                dataset_name
            ),
            RuntimeWarning,
            stacklevel=2,
        )

    # Select graph using dataset_name
    for g in graphs:
        if g["meta"]["label"] == dataset_name:
            graph_id = g["id"]

    # Convert to StellarGraph (via nx)
    Gnx = G_epgm.to_nx(graph_id, directed=directed)

    print(
        "Graph statistics: {} nodes, {} edges".format(
            Gnx.number_of_nodes(), Gnx.number_of_edges()
        )
    )
    return Gnx
Beispiel #3
0
def read_graph(graph_file, dataset_name, is_directed=False, is_weighted=False):
    """
    Reads the input network in networkx.

    Args:
        graph_file: The directory where graph in EPGM format is stored.
        dataset_name: The name of the graph selected out of all the graph heads in EPGM file.

    Returns:
        The graph in networkx format
    """

    if graph_file.split('.')[-1] == 'gpickle':
        g = nx.read_gpickle(graph_file)
        for edge in g.edges():
            g[edge[0]][edge[1]]["weight"] = 1  # {'weight': 1}

        if not is_directed:
            g = g.to_undirected()

        return g

    try:  # assume args.input points to an EPGM graph
        G_epgm = EPGM(graph_file)
        graphs = G_epgm.G["graphs"]
        if (
                dataset_name is None
        ):  # if dataset_name is not given, use the name of the 1st graph head
            dataset_name = graphs[0]["meta"]["label"]
            print(
                "WARNING: dataset name not specified, using dataset '{}' in the 1st graph head"
                .format(dataset_name))
        graph_id = None
        for g in graphs:
            if g["meta"]["label"] == dataset_name:
                graph_id = g["id"]

        g = G_epgm.to_nx(graph_id, is_directed)
        if is_weighted:
            raise NotImplementedError
        else:
            # This is the correct way to set the edge weight in a MultiGraph.
            edge_weights = {e: 1 for e in g.edges(keys=True)}
            nx.set_edge_attributes(g, name="weight", values=edge_weights)
    except:  # otherwise, assume arg.input points to an edgelist file
        if is_weighted:
            g = nx.read_edgelist(
                graph_file,
                nodetype=int,
                data=(("weight", float), ),
                create_using=nx.DiGraph(),
            )
        else:
            g = nx.read_edgelist(graph_file,
                                 nodetype=int,
                                 create_using=nx.DiGraph())
            for edge in g.edges():
                g[edge[0]][edge[1]]["weight"] = 1  # {'weight': 1}

        if not is_directed:
            g = g.to_undirected()

    print("Graph statistics: {} nodes, {} edges".format(
        g.number_of_nodes(), g.number_of_edges()))
    return g