def prep_ws(inpath): """ Preprocess web spam graph. """ # Create an empty digraph G = nx.DiGraph() # Read the file and create the graph src = 0 f = open(inpath, 'r') for line in f: if src != 0: arr = line.split() for dst in arr: dst_id = int(dst.split(':')[0]) # We consider the graph unweighted G.add_edge(src, dst_id) src += 1 # G.add_node(src-2) # Preprocess the graph G, ids = pp.prep_graph(G, relabel=True, del_self_loops=False) # Return the preprocessed graph return G
def test_split(): # Variables dataset_path = "./data/" output_path = "./data/" test_name = "network.edgelist" subgraph_size = 1000 # Load a graph G = pp.load_graph(dataset_path + test_name, delimiter="\t", comments='#', directed=True) # Restrict graph to a sub-graph of 'subgraph_size' nodes SG = G.subgraph(random.sample(G.nodes, subgraph_size)).copy() # Preprocess the graph SG, ids = pp.prep_graph(SG, relabel=True, del_self_loops=True) # Get stats of the preprocessed subgraph pp.save_graph(SG, output_path + "prep_graph.edgelist", delimiter=",") # Alternatively, train/test splits can be computed one at a time train_E, test_E = stt.split_train_test(SG, train_frac=0.51, seed=99) print(train_E) # Compute set of false edges train_E_false, test_E_false = stt.generate_false_edges_owa( SG, train_E=train_E, test_E=test_E, num_fe_train=None, num_fe_test=None, seed=99)
def preprocess(setup, i): """ Graph preprocessing rutine. """ if setup.verbose: print('Preprocesing graph...') # Load a graph G = pp.load_graph(setup.inpaths[i], delimiter=setup.separators[i], comments=setup.comments[i], directed=setup.directed[i]) # Preprocess the graph G, ids = pp.prep_graph(G, relabel=setup.relabel, del_self_loops=setup.del_selfloops) if setup.prep_nw_name is not None: # Store preprocessed graph to a file pp.save_graph(G, output_path=setup.outpaths[i] + setup.prep_nw_name, delimiter=setup.delimiter, write_stats=setup.write_stats) # Return the preprocessed graph return G
def prep_fb(inpath): """ Preprocess facebook wall post graph. """ # Load a graph G = pp.load_graph(inpath, delimiter='\t', comments='#', directed=True) # The FB graph is stores as destination, origin so needs to be reversed G = G.reverse() # Preprocess the graph G, ids = pp.prep_graph(G, relabel=True, del_self_loops=False) # Return the preprocessed graph return G
def test(): # Variables dataset_path = "./data/" output_path = "./data/" test_name = "network.edgelist" # Load a graph G = pp.load_graph(dataset_path + test_name, delimiter=',', comments='#', directed=True) # Print some stats print("") print("Original graph stats:") print("-----------------------------------------") pp.get_stats(G) # Save the graph pp.save_graph(G, output_path + "orig_graph.edgelist", delimiter=",") # Load the saved graph G2 = pp.load_graph(output_path + "orig_graph.edgelist", delimiter=",", comments='#', directed=True) # Stats comparison print("Has the same stats after being loaded?:") print("-----------------------------------------") pp.get_stats(G2) # Preprocess the graph GP, ids = pp.prep_graph(G2, del_self_loops=False, relabel=True) print("Preprocessed graph stats (restricted to main cc):") print("-----------------------------------------") pp.get_stats(GP) pp.save_graph(GP, output_path + "prep_graph.edgelist", delimiter=",") print("Sample of 10 (oldNodeID, newNodeID):") print("-----------------------------------------") print(ids[0:10]) pp.get_redges_false(GP, output_path + "redges_false.csv")
def preprocess(inpath, outpath, delimiter, directed): """ Graph preprocessing routine. """ print('Preprocessing graph...') # Load a graph G = pp.load_graph(inpath, delimiter=delimiter, comments='#', directed=directed) # Preprocess the graph G, ids = pp.prep_graph(G, relabel=True, del_self_loops=True) # Store preprocessed graph to a file pp.save_graph(G, output_path=outpath + "prep_graph.edgelist", delimiter=',', write_stats=True) # Return the preprocessed graph return G
# --------------- # Preprocess data # --------------- # Load the data as a directed graph G = pp.load_graph(dataset_path, delimiter=",", comments='#', directed=directed) # Get some graph statistics pp.get_stats(G) # Or store them to a file pp.get_stats(G, output_path + "stats.txt") # Preprocess the graph SG, ids = pp.prep_graph(G, relabel=True, del_self_loops=True) # Get non-edges so that the reversed edge exists in the graph if directed: redges = pp.get_redges_false(SG, output_path=output_path + "redges.csv") # Store the graph to a file pp.save_graph(SG, output_path=output_path + "network_prep.edgelist", delimiter=',', write_stats=True) # ---------------- # Split train test # ----------------
#!/usr/bin/env python # -*- coding: utf-8 -*- # Author: Mara Alexandru Cristian # Contact: [email protected] # Date: 18/12/2018 # This simple example is the one presented in the README.md file. from evalne.evaluation import evaluator from evalne.preprocessing import preprocess as pp # Load and preprocess the network G = pp.load_graph('../evalne/tests/data/network.edgelist') G, _ = pp.prep_graph(G) # Create an evaluator and generate train/test edge split nee = evaluator.Evaluator() _ = nee.traintest_split.compute_splits(G) # Set the baselines methods = ['random_prediction', 'common_neighbours', 'jaccard_coefficient'] # Evaluate baselines nee.evaluate_baseline(methods=methods) try: # Check if OpenNE is installed import openne # Set embedding methods from OpenNE methods = ['node2vec', 'deepwalk', 'GraRep']