コード例 #1
0
def prep_ws(inpath):
    """
    Preprocess web spam graph.
    """
    # Create an empty digraph
    G = nx.DiGraph()

    # Read the file and create the graph
    src = 0
    f = open(inpath, 'r')
    for line in f:
        if src != 0:
            arr = line.split()
            for dst in arr:
                dst_id = int(dst.split(':')[0])
                # We consider the graph unweighted
                G.add_edge(src, dst_id)
        src += 1
    # G.add_node(src-2)

    # Preprocess the graph
    G, ids = pp.prep_graph(G, relabel=True, del_self_loops=False)

    # Return the preprocessed graph
    return G
コード例 #2
0
def test_split():
    # Variables
    dataset_path = "./data/"
    output_path = "./data/"
    test_name = "network.edgelist"
    subgraph_size = 1000

    # Load a graph
    G = pp.load_graph(dataset_path + test_name,
                      delimiter="\t",
                      comments='#',
                      directed=True)

    # Restrict graph to a sub-graph of 'subgraph_size' nodes
    SG = G.subgraph(random.sample(G.nodes, subgraph_size)).copy()

    # Preprocess the graph
    SG, ids = pp.prep_graph(SG, relabel=True, del_self_loops=True)

    # Get stats of the preprocessed subgraph
    pp.save_graph(SG, output_path + "prep_graph.edgelist", delimiter=",")

    # Alternatively, train/test splits can be computed one at a time
    train_E, test_E = stt.split_train_test(SG, train_frac=0.51, seed=99)

    print(train_E)

    # Compute set of false edges
    train_E_false, test_E_false = stt.generate_false_edges_owa(
        SG,
        train_E=train_E,
        test_E=test_E,
        num_fe_train=None,
        num_fe_test=None,
        seed=99)
コード例 #3
0
ファイル: __main__.py プロジェクト: shitouxyz123/EvalNE
def preprocess(setup, i):
    """
    Graph preprocessing rutine.
    """
    if setup.verbose:
        print('Preprocesing graph...')

    # Load a graph
    G = pp.load_graph(setup.inpaths[i],
                      delimiter=setup.separators[i],
                      comments=setup.comments[i],
                      directed=setup.directed[i])

    # Preprocess the graph
    G, ids = pp.prep_graph(G,
                           relabel=setup.relabel,
                           del_self_loops=setup.del_selfloops)

    if setup.prep_nw_name is not None:
        # Store preprocessed graph to a file
        pp.save_graph(G,
                      output_path=setup.outpaths[i] + setup.prep_nw_name,
                      delimiter=setup.delimiter,
                      write_stats=setup.write_stats)

    # Return the preprocessed graph
    return G
コード例 #4
0
def prep_fb(inpath):
    """
    Preprocess facebook wall post graph.
    """
    # Load a graph
    G = pp.load_graph(inpath, delimiter='\t', comments='#', directed=True)

    # The FB graph is stores as destination, origin so needs to be reversed
    G = G.reverse()

    # Preprocess the graph
    G, ids = pp.prep_graph(G, relabel=True, del_self_loops=False)

    # Return the preprocessed graph
    return G
コード例 #5
0
def test():
    # Variables
    dataset_path = "./data/"
    output_path = "./data/"
    test_name = "network.edgelist"

    # Load a graph
    G = pp.load_graph(dataset_path + test_name,
                      delimiter=',',
                      comments='#',
                      directed=True)

    # Print some stats
    print("")
    print("Original graph stats:")
    print("-----------------------------------------")
    pp.get_stats(G)

    # Save the graph
    pp.save_graph(G, output_path + "orig_graph.edgelist", delimiter=",")

    # Load the saved graph
    G2 = pp.load_graph(output_path + "orig_graph.edgelist",
                       delimiter=",",
                       comments='#',
                       directed=True)

    # Stats comparison
    print("Has the same stats after being loaded?:")
    print("-----------------------------------------")
    pp.get_stats(G2)

    # Preprocess the graph
    GP, ids = pp.prep_graph(G2, del_self_loops=False, relabel=True)

    print("Preprocessed graph stats (restricted to main cc):")
    print("-----------------------------------------")
    pp.get_stats(GP)

    pp.save_graph(GP, output_path + "prep_graph.edgelist", delimiter=",")

    print("Sample of 10 (oldNodeID, newNodeID):")
    print("-----------------------------------------")
    print(ids[0:10])

    pp.get_redges_false(GP, output_path + "redges_false.csv")
コード例 #6
0
def preprocess(inpath, outpath, delimiter, directed):
    """
    Graph preprocessing routine.
    """
    print('Preprocessing graph...')

    # Load a graph
    G = pp.load_graph(inpath, delimiter=delimiter, comments='#', directed=directed)

    # Preprocess the graph
    G, ids = pp.prep_graph(G, relabel=True, del_self_loops=True)

    # Store preprocessed graph to a file
    pp.save_graph(G, output_path=outpath + "prep_graph.edgelist", delimiter=',', write_stats=True)

    # Return the preprocessed graph
    return G
コード例 #7
0
# ---------------
# Preprocess data
# ---------------

# Load the data as a directed graph
G = pp.load_graph(dataset_path, delimiter=",", comments='#', directed=directed)

# Get some graph statistics
pp.get_stats(G)

# Or store them to a file
pp.get_stats(G, output_path + "stats.txt")

# Preprocess the graph
SG, ids = pp.prep_graph(G, relabel=True, del_self_loops=True)

# Get non-edges so that the reversed edge exists in the graph
if directed:
    redges = pp.get_redges_false(SG, output_path=output_path + "redges.csv")

# Store the graph to a file
pp.save_graph(SG,
              output_path=output_path + "network_prep.edgelist",
              delimiter=',',
              write_stats=True)

# ----------------
# Split train test
# ----------------
コード例 #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: [email protected]
# Date: 18/12/2018

# This simple example is the one presented in the README.md file.

from evalne.evaluation import evaluator
from evalne.preprocessing import preprocess as pp

# Load and preprocess the network
G = pp.load_graph('../evalne/tests/data/network.edgelist')
G, _ = pp.prep_graph(G)

# Create an evaluator and generate train/test edge split
nee = evaluator.Evaluator()
_ = nee.traintest_split.compute_splits(G)

# Set the baselines
methods = ['random_prediction', 'common_neighbours', 'jaccard_coefficient']

# Evaluate baselines
nee.evaluate_baseline(methods=methods)

try:
    # Check if OpenNE is installed
    import openne

    # Set embedding methods from OpenNE
    methods = ['node2vec', 'deepwalk', 'GraRep']