Ejemplo n.º 1
0
def split_graph(G, output_dir, radio=0.8):
    t_dir = output_dir
    Gs = G
    file_path = os.path.join(t_dir, 'graph.edgelist')
    file_test_path = os.path.join(t_dir, 'graph_test.edgelist')
    label_path = os.path.join(t_dir, 'label.txt')
    G_train = nx.Graph()
    G_test = nx.Graph()
    edges = np.random.permutation(list(Gs.edges()))
    nodes = set()
    for a, b in edges:
        if a not in nodes or b not in nodes:
            G_train.add_edge(a, b)
            nodes.add(a)
            nodes.add(b)
        else:
            G_test.add_edge(a, b)
    print(len(nodes), Gs.number_of_nodes())
    assert len(nodes) == Gs.number_of_nodes()
    assert len(nodes) == G_train.number_of_nodes()
    num_test_edges = int((1 - radio) * Gs.number_of_edges())
    now_number = G_test.number_of_edges()
    if num_test_edges < now_number:
        test_edges = list(G_test.edges())
        G_train.add_edges_from(test_edges[:now_number - num_test_edges])
        G_test.remove_edges_from(test_edges[:now_number - num_test_edges])
    print("sample graph,origin: {} {}, train: {} {}, test: {} {}".format(
        Gs.number_of_nodes(), Gs.number_of_edges(), G_train.number_of_nodes(),
        G_train.number_of_edges(), G_test.number_of_nodes(),
        G_test.number_of_edges()))
    with utils.write_with_create(file_path) as f:
        for i, j in G_train.edges():
            print(i, j, file=f)
    with utils.write_with_create(file_test_path) as f:
        for i, j in G_test.edges():
            print(i, j, file=f)
Ejemplo n.º 2
0
def get_wne(dataset_name, sampled_dir='', cache=True):
    dataset_filename = os.path.abspath(
        os.path.join('data/{}'.format(dataset_name), sampled_dir,
                     'graph.edgelist'))
    labels = os.path.abspath(
        os.path.join(os.path.dirname(dataset_filename), 'label.txt'))
    save_path = os.path.abspath(
        os.path.join('embeddings/{}'.format(dataset_name), sampled_dir,
                     'wme.embeddings'))
    if (not cache) or (not os.path.exists(save_path)) or (
            os.path.getmtime(save_path) < os.path.getmtime(dataset_filename)):
        G = utils.load_graph(dataset_filename, label_name=None)
        do_full = (G.number_of_nodes() < 10000)
        eigenvalues = 'full' if do_full else 'auto'
        wne = netlsd.heat(G,
                          timescales=np.logspace(-2, 2, 10),
                          eigenvalues=eigenvalues)
        with utils.write_with_create(save_path) as f:
            print(" ".join(map(str, wne)), file=f)
    return np.loadtxt(save_path)
Ejemplo n.º 3
0
def sample_graph(G,
                 output_dir,
                 s_n,
                 times=10,
                 with_test=False,
                 radio=0.8,
                 feature_path=None):
    if s_n is None:
        s_n = int(np.sqrt(G.number_of_nodes()))
    for t in range(times):
        t_dir = os.path.join(output_dir, 's{}'.format(t))
        n = random.randint(int(s_n / 2), 2 * s_n)
        Gs = utils.random_walk_induced_graph_sampling(G, n)
        mapping = dict(zip(Gs.nodes(), range(Gs.number_of_nodes())))
        if feature_path is not None:
            feats = sparse.load_npz(feature_path)
            row = []
            col = []
            data = []
            fr, fc = feats.nonzero()
            for i, j in zip(fr, fc):
                if i in mapping:
                    row.append(mapping[i])
                    col.append(j)
                    data.append(feats[i, j])
            feats = sparse.csr_matrix((data, (row, col)),
                                      shape=(len(mapping), feats.shape[1]))
        Gs = nx.relabel_nodes(Gs, mapping)
        file_path = os.path.join(t_dir, 'graph.edgelist')
        file_test_path = os.path.join(t_dir, 'graph_test.edgelist')
        label_path = os.path.join(t_dir, 'label.txt')
        feature_save_path = os.path.join(t_dir, 'features.npz')
        if feature_path is not None:
            utils.write_with_create(feature_save_path)
            sparse.save_npz(feature_save_path, feats)
        if not with_test:
            print("sample graph, nodes: {}, edges: {}, save into {}".format(
                Gs.number_of_nodes(), Gs.number_of_edges(), t_dir))
            with utils.write_with_create(file_path) as f:
                for i, j in Gs.edges():
                    print(i, j, file=f)
            with utils.write_with_create(label_path) as f:
                for i, data in Gs.nodes(data=True):
                    if 'label' in data:
                        for j in data['label']:
                            print(i, j, file=f)
        else:
            G_train = nx.Graph()
            G_test = nx.Graph()
            edges = np.random.permutation(list(Gs.edges()))
            nodes = set()
            for a, b in edges:
                if a not in nodes or b not in nodes:
                    G_train.add_edge(a, b)
                    nodes.add(a)
                    nodes.add(b)
                else:
                    G_test.add_edge(a, b)
            assert len(nodes) == Gs.number_of_nodes()
            assert len(nodes) == G_train.number_of_nodes()
            num_test_edges = int((1 - radio) * Gs.number_of_edges())
            now_number = G_test.number_of_edges()
            if num_test_edges < now_number:
                test_edges = list(G_test.edges())
                G_train.add_edges_from(test_edges[:now_number -
                                                  num_test_edges])
                G_test.remove_edges_from(test_edges[:now_number -
                                                    num_test_edges])
            print(
                "sample graph,origin: {} {}, train: {} {}, test: {} {}".format(
                    Gs.number_of_nodes(), Gs.number_of_edges(),
                    G_train.number_of_nodes(), G_train.number_of_edges(),
                    G_test.number_of_nodes(), G_test.number_of_edges()))
            with utils.write_with_create(file_path) as f:
                for i, j in G_train.edges():
                    print(i, j, file=f)
            with utils.write_with_create(file_test_path) as f:
                for i, j in G_test.edges():
                    print(i, j, file=f)