def build_graph(num_nodes, edge_path): filelist = [] if os.path.isfile(edge_path): filelist = [edge_path] elif os.path.isdir(edge_path): filelist = [ os.path.join(dp, f) for dp, dn, filenames in os.walk(edge_path) for f in filenames ] else: raise ValueError(edge_path + " not supported") edges, edge_weight = [], [] for name in filelist: with open(name) as inf: for line in inf: slots = line.strip("\n").split() edges.append([slots[0], slots[1]]) edges.append([slots[1], slots[0]]) if len(slots) > 2: edge_weight.extend([float(slots[2]), float(slots[2])]) edges = np.array(edges, dtype="int64") assert num_nodes > edges.max( ), "Node id in any edges should be smaller then num_nodes!" edge_feat = dict() if len(edge_weight) == len(edges): edge_feat["weight"] = np.array(edge_weight) graph = Graph(num_nodes, edges, edge_feat=edge_feat) log.info("Build graph done") graph.outdegree() del edges, edge_feat log.info("Build graph index done") if "weight" in graph.edge_feat: graph.node_feat["alias"], graph.node_feat[ "events"] = graph_alias_sample_table(graph, "weight") log.info("Build graph alias sample table done") return graph
def build_graph(num_nodes, edge_path, output_path, undigraph=True): """ build_graph """ edge_file = os.path.join(output_path, "edge.npy") edge_weight_file = os.path.join(output_path, "edge_weight.npy") alias_file = os.path.join(output_path, "alias.npy") events_file = os.path.join(output_path, "events.npy") if os.path.isfile(edge_file): edges = np.load(edge_file) edge_feat = dict() if os.path.isfile(edge_weight_file): log.info("Loading weight from cache") edge_feat["weight"] = np.load(edge_weight_file, allow_pickle=True) node_feat = dict() if os.path.isfile(alias_file): log.info("Loading alias from cache") node_feat["alias"] = np.load(alias_file, allow_pickle=True) if os.path.isfile(events_file): log.info("Loading events from cache") node_feat["events"] = np.load(events_file, allow_pickle=True) else: filelist = get_file_list(edge_path) edges, edge_weight = [], [] log.info("Reading edge files") for name in filelist: with open(name) as inf: for line in inf: slots = line.strip("\n").split() edges.append([slots[0], slots[1]]) if len(slots) > 2: edge_weight.append(slots[2]) edges = np.array(edges, dtype="int64") assert num_nodes > edges.max( ), "Node id in any edges should be smaller then num_nodes!" log.info("Read edge files done.") edge_feat = dict() node_feat = dict() if len(edge_weight) == len(edges): edge_feat["weight"] = np.array(edge_weight, dtype="float32") if undigraph is True: edges = np.concatenate([edges, edges[:, [1, 0]]], 0) if "weight" in edge_feat: edge_feat["weight"] = np.concatenate( [edge_feat["weight"], edge_feat["weight"]], 0).astype("float64") graph = Graph(num_nodes, edges, node_feat, edge_feat=edge_feat) log.info("Build graph done") graph.outdegree() log.info("Build graph index done") if "weight" in graph.edge_feat and "alias" not in graph.node_feat and "events" not in graph.node_feat: graph.node_feat["alias"], graph.node_feat[ "events"] = graph_alias_sample_table(graph, "weight") log.info( "Build graph alias sample table done, and saving alias & evnets cache" ) np.save(alias_file, graph.node_feat["alias"]) np.save(events_file, graph.node_feat["events"]) return graph