def get_trivial_graph(n_node_features=0): """Generate a trivial graph for internal purposes.""" nodes = pd.DataFrame([0, 1, 2]) if n_node_features > 0: nodes["features"] = 3 * [n_node_features * [0.0]] edges = pd.DataFrame() edges["start_node"] = [0, 1, 2] edges["end_node"] = [1, 2, 0] return Graph(nodes, edges, 0)
def make(folder="./datasets", add_features=False, write_to_file=True, n_graphs=5): """ Makes pickle with graphs that test robustness of hcga """ graphs = [] # one, two and three node graphs for _ in range(n_graphs): graphs.append( _add_graph_desc(nx.grid_graph([1]).copy(), "one-node graph")) graphs.append( _add_graph_desc(nx.grid_graph([2]).copy(), "two-node graph")) graphs.append( _add_graph_desc(nx.grid_graph([3]).copy(), "three-node graph")) # no edges G = nx.Graph() G.add_node(0) G.add_node(1, weight=2) G.add_node(2, weight=3) for _ in range(n_graphs): graphs.append(_add_graph_desc(G.copy(), "graph without edges")) # directed graph no weights G = nx.DiGraph() G.add_nodes_from(range(100, 110)) for _ in range(n_graphs): graphs.append( _add_graph_desc(G.copy(), "directed graph with no weights")) # directed graph weighted G = nx.DiGraph() H = nx.path_graph(10) G.add_nodes_from(H) G.add_edges_from(H.edges) for _ in range(n_graphs): graphs.append(_add_graph_desc(G.copy(), "directed graph weighted")) # adding features to all if add_features: graphs = [add_dummy_node_features(graph) for graph in graphs] graphs_coll = GraphCollection() for graph in graphs: nodes = pd.DataFrame(list(graph.nodes)) edges = pd.DataFrame(columns=["start_node", "end_node"]) for i, edge in enumerate(graph.edges): edges.loc[i, "start_node"] = edge[0] edges.loc[i, "end_node"] = edge[1] graphs_coll.add_graph(Graph(nodes, edges, np.random.randint(0, 2))) if write_to_file: save_dataset(graphs_coll, "TESTDATA", folder=folder) return graphs_coll
def extract_benchmark_graphs(datadir, dataname): # pylint: disable=too-many-locals """ Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets graph index starts with 1 in file Returns: List of networkx objects with graph and node labels """ prefix = str(Path(datadir) / dataname) with open(prefix + "_graph_indicator.txt") as f: nodes_df = pd.read_csv(f, dtype=np.int, header=None) - 1 nodes_df.columns = ["graph_id"] with open(prefix + "_graph_labels.txt") as f: graph_labels = pd.read_csv(f, header=None) edges_df = pd.DataFrame() with open(prefix + "_A.txt") as f: for edges_df_next in pd.read_csv( f, sep=",", delimiter=None, dtype=np.int, header=None, chunksize=1e6 ): edges_df = edges_df.append(edges_df_next - 1) edges_df.columns = ["start_node", "end_node"] edges_df["graph_id"] = nodes_df["graph_id"][ edges_df["start_node"].to_list() ].to_list() columns = [] if Path(prefix + "_node_labels.txt").exists(): with open(prefix + "_node_labels.txt") as f: nodes_df["labels_value"] = pd.read_csv(f, header=None) nodes_df["labels"] = list( pd.get_dummies(nodes_df["labels_value"]).to_numpy(dtype=float) ) columns.append("labels") if Path(prefix + "_node_attributes.txt").exists(): with open(prefix + "_node_attributes.txt") as f: nodes_df["attributes"] = list(pd.read_csv(f, header=None).to_numpy()) columns.append("attributes") graph_ids = list(set(nodes_df["graph_id"])) graphs = GraphCollection() for graph_id in graph_ids: nodes = nodes_df.loc[nodes_df["graph_id"] == graph_id][columns] edges = edges_df.loc[edges_df["graph_id"] == graph_id][ ["start_node", "end_node"] ] graphs.add_graph(Graph(nodes, edges, int(graph_labels.loc[graph_id][0]))) return graphs
def _add_graph(label): for _ in range(n_graphs): graph = nx.stochastic_block_model(sizes, probs) graphs.add_graph(Graph(list(graph.nodes), list(graph.edges), label))