def do_link_prediction( NETWORK_FILE, TEST_EDGE_FILE, NEGATIVE_EDGE_FILE, DIRECTED, OUT_FILE, DIM, NUMBER_OF_CORES, ): G = read_graph(NETWORK_FILE, directed=DIRECTED) model = node2vec.Node2Vec(G, directed=DIRECTED, dimensions=DIM, workers=NUMBER_OF_CORES) model.simulate_walks() emb = model.learn_embedding() test_edges = read_edge_file(TEST_EDGE_FILE) negative_edges = read_edge_file(NEGATIVE_EDGE_FILE) name = "\t".join([NETWORK_FILE, str(DIM)]) test = LinkPredictionTask(test_edges, negative_edges, emb, name=name, is_persona_emb=False) test.do_link_prediction() test.write_result(OUT_FILE)
def do_link_prediction( NETWORK_FILE, TEST_EDGE_FILE, NEGATIVE_EDGE_FILE, DIRECTED, OUT_FILE, LAMBDA, DIM, NUMBER_OF_CORES, ): G = read_graph(NETWORK_FILE, directed=DIRECTED) model = Persona2Vec(G, lambd=LAMBDA, directed=DIRECTED, dimensions=DIM, workers=NUMBER_OF_CORES) emb = model.embedding test_edges = read_edge_file(TEST_EDGE_FILE) negative_edges = read_edge_file(NEGATIVE_EDGE_FILE) name = "\t".join([NETWORK_FILE, str(LAMBDA), str(DIM)]) test = LinkPredictionTask( test_edges, negative_edges, emb, name=name, is_persona_emb=True, node_to_persona=model.node_to_persona, ) test.do_link_prediction() test.write_result(OUT_FILE)
def main(): """ Parsing command line parameters. Reading data, embedding base graph, creating persona graph and learning a splitter. saving the persona mapping and the embedding. """ args = parse_args() tab_printer(args) G = read_graph(args.input, args.weighted, args.directed) model = Persona2Vec( G, lambd=args.lambd, clustering_method="connected_component", directed=args.directed, num_walks_base=args.num_walks_base, walk_length_base=args.walk_length_base, window_size_base=args.window_size_base, num_walks_persona=args.num_walks_persona, walk_length_persona=args.walk_length_persona, window_size_persona=args.window_size_persona, p=args.p, q=args.q, dimensions=args.dimensions, epoch_base=args.epoch_base, epoch_persona=args.epoch_persona, workers=args.workers, ) model.save_persona_network(args.persona_network) model.save_persona_to_node_mapping(args.persona_to_node) model.save_node_to_persona_mapping(args.node_to_persona) model.save_base_embedding(args.base_emb) model.save_persona_embedding(args.persona_emb)
def train_test_set_split(IN_FILE, INDEX, DIRECTED): G = read_graph(IN_FILE, directed=DIRECTED) OUTPUT_PATH = "{}_{}".format(IN_FILE.split(".")[0], INDEX) splitter = NetworkTrainTestSplitter(G, directed=DIRECTED) splitter.train_test_split() splitter.generate_negative_edges() splitter.save_splitted_result(OUTPUT_PATH)
def do_link_prediction( NETWORK_FILE, TEST_EDGE_FILE, NEGATIVE_EDGE_FILE, DIRECTED, OUT_FILE, DIM, NUMBER_OF_CORES, ): G = read_graph(NETWORK_FILE, directed=DIRECTED) model = node2vec.Node2Vec(G, directed=DIRECTED, dimensions=DIM, workers=NUMBER_OF_CORES) model.simulate_walks() emb = model.learn_embedding() test_edges = read_edge_file(TEST_EDGE_FILE) negative_edges = read_edge_file(NEGATIVE_EDGE_FILE) test_hardmord_vectors = [ np.multiply(emb[src], emb[tag]) for src, tag in test_edges ] negative_hardmord_vectors = [ np.multiply(emb[src], emb[tag]) for src, tag in negative_edges ] xs = np.concatenate([test_hardmord_vectors, negative_hardmord_vectors]) ys = np.concatenate([ np.ones(len(test_hardmord_vectors)), np.zeros(len(negative_hardmord_vectors)) ]) clf = LogisticRegression(random_state=0).fit(xs, ys) predicted_ys = clf.predict(xs) ROC_AUC_value = roc_auc_score(ys, predicted_ys) name = "\t".join([NETWORK_FILE, str(DIM)]) f = open(OUT_FILE, "a") f.write("{}\t{}\n".format(*[name, str(ROC_AUC_value)])) f.close()
def convert_network_files_for_splitter(NETWORK_FILE, CONVERTED_NETWORK_FILE, DIRECTED): G = read_graph(NETWORK_FILE, directed=DIRECTED) G_ = nx.convert_node_labels_to_integers(G, label_attribute="old_label") nx.write_edgelist(G_, CONVERTED_NETWORK_FILE) return G_