Ejemplo n.º 1
0
def build_graph(num_nodes, edge_path):
    filelist = []
    if os.path.isfile(edge_path):
        filelist = [edge_path]
    elif os.path.isdir(edge_path):
        filelist = [
            os.path.join(dp, f)
            for dp, dn, filenames in os.walk(edge_path) for f in filenames
        ]
    else:
        raise ValueError(edge_path + " not supported")
    edges, edge_weight = [], []
    for name in filelist:
        with open(name) as inf:
            for line in inf:
                slots = line.strip("\n").split()
                edges.append([slots[0], slots[1]])
                edges.append([slots[1], slots[0]])
                if len(slots) > 2:
                    edge_weight.extend([float(slots[2]), float(slots[2])])
    edges = np.array(edges, dtype="int64")
    assert num_nodes > edges.max(
    ), "Node id in any edges should be smaller then num_nodes!"

    edge_feat = dict()
    if len(edge_weight) == len(edges):
        edge_feat["weight"] = np.array(edge_weight)

    graph = Graph(num_nodes, edges, edge_feat=edge_feat)
    log.info("Build graph done")

    graph.outdegree()

    del edges, edge_feat

    log.info("Build graph index done")
    if "weight" in graph.edge_feat:
        graph.node_feat["alias"], graph.node_feat[
            "events"] = graph_alias_sample_table(graph, "weight")
        log.info("Build graph alias sample table done")
    return graph
Ejemplo n.º 2
0
def build_graph(num_nodes, edge_path, output_path, undigraph=True):
    """ build_graph
    """
    edge_file = os.path.join(output_path, "edge.npy")
    edge_weight_file = os.path.join(output_path, "edge_weight.npy")
    alias_file = os.path.join(output_path, "alias.npy")
    events_file = os.path.join(output_path, "events.npy")
    if os.path.isfile(edge_file):
        edges = np.load(edge_file)
        edge_feat = dict()
        if os.path.isfile(edge_weight_file):
            log.info("Loading weight from cache")
            edge_feat["weight"] = np.load(edge_weight_file, allow_pickle=True)
        node_feat = dict()
        if os.path.isfile(alias_file):
            log.info("Loading alias from cache")
            node_feat["alias"] = np.load(alias_file, allow_pickle=True)
        if os.path.isfile(events_file):
            log.info("Loading events from cache")
            node_feat["events"] = np.load(events_file, allow_pickle=True)
    else:
        filelist = get_file_list(edge_path)
        edges, edge_weight = [], []
        log.info("Reading edge files")
        for name in filelist:
            with open(name) as inf:
                for line in inf:
                    slots = line.strip("\n").split()
                    edges.append([slots[0], slots[1]])
                    if len(slots) > 2:
                        edge_weight.append(slots[2])
        edges = np.array(edges, dtype="int64")
        assert num_nodes > edges.max(
        ), "Node id in any edges should be smaller then num_nodes!"

        log.info("Read edge files done.")
        edge_feat = dict()
        node_feat = dict()
        if len(edge_weight) == len(edges):
            edge_feat["weight"] = np.array(edge_weight, dtype="float32")

    if undigraph is True:
        edges = np.concatenate([edges, edges[:, [1, 0]]], 0)
        if "weight" in edge_feat:
            edge_feat["weight"] = np.concatenate(
                [edge_feat["weight"], edge_feat["weight"]],
                0).astype("float64")

    graph = Graph(num_nodes, edges, node_feat, edge_feat=edge_feat)
    log.info("Build graph done")
    graph.outdegree()
    log.info("Build graph index done")
    if "weight" in graph.edge_feat and "alias" not in graph.node_feat and "events" not in graph.node_feat:
        graph.node_feat["alias"], graph.node_feat[
            "events"] = graph_alias_sample_table(graph, "weight")
        log.info(
            "Build graph alias sample table done, and saving alias & evnets cache"
        )
        np.save(alias_file, graph.node_feat["alias"])
        np.save(events_file, graph.node_feat["events"])
    return graph