예제 #1
0
def run_test():

    # Set some variables
    filename = "./data/network.edgelist"
    directed = False

    # Load the test graph
    G = pp.load_graph(filename, delimiter=",", comments='#', directed=directed)

    # Print some stars about the graph
    pp.get_stats(G)

    # Create an evaluator
    nee = evaluator.Evaluator()

    # Generate one train/test split with all edges in train set
    start = time()
    train_E, train_E_false, test_E, test_E_false = nee.traintest_split.compute_splits(
        G, train_frac=0.9)
    # nee.traintest_split.read_splits('./data/data', 0, directed, verbose=False)
    end = time() - start
    print("\nSplits computed in {} sec".format(end))

    # Test baselines
    start = time()
    test_baselines(nee, directed)
    end = time() - start
    print("\nBaselines computed in {} sec".format(end))

    # Test Katz
    start = time()
    test_katz(nee)
    end = time() - start
    print("\nKatz computed in {} sec".format(end))
예제 #2
0
def test_split():
    # Variables
    dataset_path = "./data/"
    output_path = "./data/"
    test_name = "network.edgelist"
    subgraph_size = 1000

    # Load a graph
    G = pp.load_graph(dataset_path + test_name,
                      delimiter="\t",
                      comments='#',
                      directed=True)

    # Restrict graph to a sub-graph of 'subgraph_size' nodes
    SG = G.subgraph(random.sample(G.nodes, subgraph_size)).copy()

    # Preprocess the graph
    SG, ids = pp.prep_graph(SG, relabel=True, del_self_loops=True)

    # Get stats of the preprocessed subgraph
    pp.save_graph(SG, output_path + "prep_graph.edgelist", delimiter=",")

    # Alternatively, train/test splits can be computed one at a time
    train_E, test_E = stt.split_train_test(SG, train_frac=0.51, seed=99)

    print(train_E)

    # Compute set of false edges
    train_E_false, test_E_false = stt.generate_false_edges_owa(
        SG,
        train_E=train_E,
        test_E=test_E,
        num_fe_train=None,
        num_fe_test=None,
        seed=99)
예제 #3
0
def preprocess(setup, i):
    """
    Graph preprocessing rutine.
    """
    if setup.verbose:
        print('Preprocesing graph...')

    # Load a graph
    G = pp.load_graph(setup.inpaths[i],
                      delimiter=setup.separators[i],
                      comments=setup.comments[i],
                      directed=setup.directed[i])

    # Preprocess the graph
    G, ids = pp.prep_graph(G,
                           relabel=setup.relabel,
                           del_self_loops=setup.del_selfloops)

    if setup.prep_nw_name is not None:
        # Store preprocessed graph to a file
        pp.save_graph(G,
                      output_path=setup.outpaths[i] + setup.prep_nw_name,
                      delimiter=setup.delimiter,
                      write_stats=setup.write_stats)

    # Return the preprocessed graph
    return G
예제 #4
0
def test():
    # Variables
    dataset_path = "./data/"
    output_path = "./data/"
    test_name = "network.edgelist"

    # Load a graph
    G = pp.load_graph(dataset_path + test_name,
                      delimiter=',',
                      comments='#',
                      directed=True)

    # Print some stats
    print("")
    print("Original graph stats:")
    print("-----------------------------------------")
    pp.get_stats(G)

    # Save the graph
    pp.save_graph(G, output_path + "orig_graph.edgelist", delimiter=",")

    # Load the saved graph
    G2 = pp.load_graph(output_path + "orig_graph.edgelist",
                       delimiter=",",
                       comments='#',
                       directed=True)

    # Stats comparison
    print("Has the same stats after being loaded?:")
    print("-----------------------------------------")
    pp.get_stats(G2)

    # Preprocess the graph
    GP, ids = pp.prep_graph(G2, del_self_loops=False, relabel=True)

    print("Preprocessed graph stats (restricted to main cc):")
    print("-----------------------------------------")
    pp.get_stats(GP)

    pp.save_graph(GP, output_path + "prep_graph.edgelist", delimiter=",")

    print("Sample of 10 (oldNodeID, newNodeID):")
    print("-----------------------------------------")
    print(ids[0:10])

    pp.get_redges_false(GP, output_path + "redges_false.csv")
예제 #5
0
def prep_fb(inpath):
    """
    Preprocess facebook wall post graph.
    """
    # Load a graph
    G = pp.load_graph(inpath, delimiter='\t', comments='#', directed=True)

    # The FB graph is stores as destination, origin so needs to be reversed
    G = G.reverse()

    # Preprocess the graph
    G, ids = pp.prep_graph(G, relabel=True, del_self_loops=False)

    # Return the preprocessed graph
    return G
예제 #6
0
def preprocess(inpath, outpath, delimiter, directed):
    """
    Graph preprocessing routine.
    """
    print('Preprocessing graph...')

    # Load a graph
    G = pp.load_graph(inpath, delimiter=delimiter, comments='#', directed=directed)

    # Preprocess the graph
    G, ids = pp.prep_graph(G, relabel=True, del_self_loops=True)

    # Store preprocessed graph to a file
    pp.save_graph(G, output_path=outpath + "prep_graph.edgelist", delimiter=',', write_stats=True)

    # Return the preprocessed graph
    return G
예제 #7
0
# Time count
start = time()

# Create folders for the results if these do not exist
if not os.path.exists(output_path):
    os.makedirs(output_path)

if not os.path.exists(output_path + "lp_train_test_splits/"):
    os.makedirs(output_path + "lp_train_test_splits/")

# ---------------
# Preprocess data
# ---------------

# Load the data as a directed graph
G = pp.load_graph(dataset_path, delimiter=",", comments='#', directed=directed)

# Get some graph statistics
pp.get_stats(G)

# Or store them to a file
pp.get_stats(G, output_path + "stats.txt")

# Preprocess the graph
SG, ids = pp.prep_graph(G, relabel=True, del_self_loops=True)

# Get non-edges so that the reversed edge exists in the graph
if directed:
    redges = pp.get_redges_false(SG, output_path=output_path + "redges.csv")

# Store the graph to a file
예제 #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: [email protected]
# Date: 18/12/2018

# This simple example is the one presented in the README.md file.

from evalne.evaluation import evaluator
from evalne.preprocessing import preprocess as pp

# Load and preprocess the network
G = pp.load_graph('../evalne/tests/data/network.edgelist')
G, _ = pp.prep_graph(G)

# Create an evaluator and generate train/test edge split
nee = evaluator.Evaluator()
_ = nee.traintest_split.compute_splits(G)

# Set the baselines
methods = ['random_prediction', 'common_neighbours', 'jaccard_coefficient']

# Evaluate baselines
nee.evaluate_baseline(methods=methods)

try:
    # Check if OpenNE is installed
    import openne

    # Set embedding methods from OpenNE
    methods = ['node2vec', 'deepwalk', 'GraRep']