def karate_test(pos_train_file, pos_valid_file, pos_test_file, neg_train_file, neg_valid_file, neg_test_file, embed_graph, p, q, walk_length, num_walks, num_epochs, classifier, edge_embed_method, skipValidation, output): pos_train_graph = CSFGraph(pos_train_file) pos_valid_graph = CSFGraph(pos_valid_file) pos_test_graph = CSFGraph(pos_test_file) neg_train_graph = CSFGraph(neg_train_file) neg_valid_graph = CSFGraph(neg_valid_file) neg_test_graph = CSFGraph(neg_test_file) # Graph (node) embeding using SkipGram as the word2vec model, with 2 epochs. graph = embiggen.random_walk_generator.N2vGraph(pos_train_graph, p, q) walks = graph.simulate_walks(num_walks, walk_length) worddictionary = pos_train_graph.get_node_to_index_map() reverse_worddictionary = pos_train_graph.get_index_to_node_map() model = SkipGramWord2Vec(walks, worddictionary=worddictionary, reverse_worddictionary=reverse_worddictionary, num_epochs=num_epochs) model.train() write_embeddings(embed_graph, model.embedding, reverse_worddictionary) # Link prediction on the pos/neg train/valid/test sets using RF classifier lp = LinkPrediction(pos_train_graph, pos_valid_graph, pos_test_graph, neg_train_graph, neg_valid_graph, neg_test_graph, embed_graph, edge_embed_method, classifier, skipValidation, output) lp.prepare_edge_and_node_labels() lp.predict_links() lp.output_classifier_results()
def setUp(self): curdir = os.path.dirname(__file__) pos_train = os.path.join(curdir, 'data/ppismall/pos_train_edges') pos_train = os.path.abspath(pos_train) training_graph = CSFGraph(pos_train) # obtain data needed to build model worddictionary = training_graph.get_node_to_index_map() reverse_worddictionary = training_graph.get_index_to_node_map() # initialize n2v object p, q = 1, 1 self.number_of_nodes_in_training = training_graph.node_count() self.n2v_graph = N2vGraph(csf_graph=training_graph, p=p, q=q) # generate random walks self.walk_length = 10 self.num_walks = 5 self.walks = self.n2v_graph.simulate_walks(num_walks=self.num_walks, walk_length=self.walk_length) # walks is now a list of lists of ints # build cbow model self.cbow = ContinuousBagOfWordsWord2Vec(self.walks, worddictionary=worddictionary, reverse_worddictionary=reverse_worddictionary, num_epochs=2) self.cbow.train()
def setUp(self): # read in sample data current_directory = os.path.dirname(__file__) self.data_dir = os.path.join(current_directory, 'data') pos_train = os.path.abspath(self.data_dir + '/ppismall/pos_train_edges') # read data into graph training_graph = CSFGraph(pos_train) worddictionary = training_graph.get_node_to_index_map() self.reverse_worddictionary = training_graph.get_index_to_node_map() # generate random walks n2v_graph = N2vGraph(training_graph, 1, 1) walks = n2v_graph.simulate_walks(5, 10) # learn embeddings self.model = ContinuousBagOfWordsWord2Vec( walks, worddictionary=worddictionary, reverse_worddictionary=self.reverse_worddictionary, num_epochs=2) # create temporary directory to write data to self.temp_dir_loc = os.path.abspath(self.data_dir + '/temp') os.mkdir(self.temp_dir_loc) return None