def setUp(self): # read in sample data current_directory = os.path.dirname(__file__) self.data_dir = os.path.join(current_directory, 'data') pos_train = os.path.abspath(self.data_dir + '/ppismall/pos_train_edges') # read data into graph training_graph = CSFGraph(pos_train) worddictionary = training_graph.get_node_to_index_map() self.reverse_worddictionary = training_graph.get_index_to_node_map() # generate random walks n2v_graph = N2vGraph(training_graph, 1, 1) walks = n2v_graph.simulate_walks(5, 10) # learn embeddings self.model = ContinuousBagOfWordsWord2Vec( walks, worddictionary=worddictionary, reverse_worddictionary=self.reverse_worddictionary, num_epochs=2) # create temporary directory to write data to self.temp_dir_loc = os.path.abspath(self.data_dir + '/temp') os.mkdir(self.temp_dir_loc) return None
def setUp(self): curdir = os.path.dirname(__file__) pos_train = os.path.join(curdir, 'data/ppismall/pos_train_edges') pos_train = os.path.abspath(pos_train) training_graph = CSFGraph(pos_train) # obtain data needed to build model worddictionary = training_graph.get_node_to_index_map() reverse_worddictionary = training_graph.get_index_to_node_map() # initialize n2v object p, q = 1, 1 self.number_of_nodes_in_training = training_graph.node_count() self.n2v_graph = N2vGraph(csf_graph=training_graph, p=p, q=q) # generate random walks self.walk_length = 10 self.num_walks = 5 self.walks = self.n2v_graph.simulate_walks(num_walks=self.num_walks, walk_length=self.walk_length) # walks is now a list of lists of ints # build cbow model self.cbow = ContinuousBagOfWordsWord2Vec(self.walks, worddictionary=worddictionary, reverse_worddictionary=reverse_worddictionary, num_epochs=2) self.cbow.train()
def setUp(self): input_file = os.path.join(os.path.dirname(__file__), 'data', 'unweighted_small_graph.txt') g = CSFGraph(input_file) self.g = g return str(g)
def setUp(self): data_dir = os.path.join(os.path.dirname(__file__), 'data') # these pass the tests okay node_file = os.path.join(data_dir, 'small_graph_nodes.tsv') edge_file = os.path.join(data_dir, 'small_graph_edges.tsv') g = CSFGraph(edge_file=edge_file, node_file=node_file) self.graph = g
def setUp(self): data_dir = os.path.join(os.path.dirname(__file__), 'data') edge_file = os.path.join(data_dir, 'small_het_graph_edges.tsv') node_file = os.path.join(data_dir, 'small_het_graph_nodes.tsv') g = CSFGraph(edge_file=edge_file, node_file=node_file) self.graph = g self.nodes = g.nodes() self.g1index = self.__get_index('g1') self.d1index = self.__get_index('d1')
def karate_test(pos_train_file, pos_valid_file, pos_test_file, neg_train_file, neg_valid_file, neg_test_file, embed_graph, p, q, walk_length, num_walks, num_epochs, classifier, edge_embed_method, skipValidation, output): pos_train_graph = CSFGraph(pos_train_file) pos_valid_graph = CSFGraph(pos_valid_file) pos_test_graph = CSFGraph(pos_test_file) neg_train_graph = CSFGraph(neg_train_file) neg_valid_graph = CSFGraph(neg_valid_file) neg_test_graph = CSFGraph(neg_test_file) # Graph (node) embeding using SkipGram as the word2vec model, with 2 epochs. graph = embiggen.random_walk_generator.N2vGraph(pos_train_graph, p, q) walks = graph.simulate_walks(num_walks, walk_length) worddictionary = pos_train_graph.get_node_to_index_map() reverse_worddictionary = pos_train_graph.get_index_to_node_map() model = SkipGramWord2Vec(walks, worddictionary=worddictionary, reverse_worddictionary=reverse_worddictionary, num_epochs=num_epochs) model.train() write_embeddings(embed_graph, model.embedding, reverse_worddictionary) # Link prediction on the pos/neg train/valid/test sets using RF classifier lp = LinkPrediction(pos_train_graph, pos_valid_graph, pos_test_graph, neg_train_graph, neg_valid_graph, neg_test_graph, embed_graph, edge_embed_method, classifier, skipValidation, output) lp.prepare_edge_and_node_labels() lp.predict_links() lp.output_classifier_results()
def setUp(self): data_dir = os.path.join(os.path.dirname(__file__), 'data') # files for canonical test graph self.edge_file = os.path.join(data_dir, 'small_graph_edges.tsv') self.node_file = os.path.join(data_dir, 'small_graph_nodes.tsv') # legacy and non-standard test files self.legacy_edge_file = os.path.join(data_dir, 'small_graph_LEGACY.txt') self.tsv_no_subject = os.path.join(data_dir, 'small_graph_edges_NO_SUBJECT.tsv') self.tsv_no_object = os.path.join(data_dir, 'small_graph_edges_NO_OBJECT.tsv') self.node_file_missing_nodes = os.path.join( data_dir, 'small_graph_nodes_MISSING_NODES.tsv') g = CSFGraph(edge_file=self.edge_file) self.g = g str(g)
def setUp(self) -> None: self.file_dir = 'tests/data/ppismall_with_validation/' self.pos_train_graph = CSFGraph( os.path.join(self.file_dir, 'pos_train_edges_max_comp_graph')) self.pos_valid_graph = CSFGraph( os.path.join(self.file_dir, 'pos_validation_edges_max_comp_graph')) self.pos_test_graph = CSFGraph( os.path.join(self.file_dir, 'pos_test_edges_max_comp_graph')) self.neg_train_graph = CSFGraph( os.path.join(self.file_dir, 'neg_train_edges_max_comp_graph')) self.neg_valid_graph = CSFGraph( os.path.join(self.file_dir, 'neg_validation_edges_max_comp_graph')) self.neg_test_graph = CSFGraph( os.path.join(self.file_dir, 'neg_test_edges_max_comp_graph')) self.test_embeddings = os.path.join(self.file_dir, 'test.embeddings')
def read_graphs(): """ Reads pos_train, pos_vslid, pos_test, neg_train train_valid and neg_test edges with CSFGraph :return: pos_train, pos_valid, pos_test, neg_train, neg_valid and neg_test graphs in CSFGraph format """ start = time.time() pos_train_graph = CSFGraph(args.pos_train) pos_valid_graph = CSFGraph(args.pos_valid) pos_test_graph = CSFGraph(args.pos_test) neg_train_graph = CSFGraph(args.neg_train) neg_valid_graph = CSFGraph(args.neg_valid) neg_test_graph = CSFGraph(args.neg_test) end = time.time() logging.info("reading input edge lists files: {} seconds".format(end - start)) return pos_train_graph, pos_valid_graph, pos_test_graph, neg_train_graph, neg_valid_graph, neg_test_graph
def test_nodetype2count_dictionary(self): het_g = CSFGraph(edge_file=self.edge_file, node_file=self.node_file) self.assertIsInstance(self.g.nodetype2count_dictionary, dict) self.assertEqual( self.g.nodetype2count_dictionary['biolink:NamedThing'], 11) self.assertEqual(het_g.nodetype2count_dictionary['biolink:Disease'], 3)
def test_count_edges_legacy_edge_file(self): g = CSFGraph(edge_file=self.legacy_edge_file) self.assertEqual(6, g.edge_count())
def test_csfgraph_constructor_accepts_node_file(self): g = CSFGraph(edge_file=self.edge_file, node_file=self.node_file)
def test_csfgraph_accepts_edge_file(self): g = CSFGraph(edge_file=self.edge_file)
def test_csfgraph_checks_for_object_column(self): with self.assertRaises(CSFGraphNoObjectColumnError) as context: CSFGraph( edge_file=self.tsv_no_object) # file doesn't have object col
def test_csfgraph_populates_nodetype_to_index_map(self): het_g = CSFGraph(edge_file=self.edge_file, node_file=self.node_file) self.assertEqual(het_g.nodetype_to_index_map['biolink:Disease'], [0, 1, 2])
def test_csfgraph_populates_index_to_nodetype_map(self): het_g = CSFGraph(edge_file=self.edge_file, node_file=self.node_file) self.assertEqual(11, len(het_g.index_to_nodetype_map)) self.assertEqual(het_g.index_to_nodetype_map[0], 'biolink:Disease')
def setUp(self): inputfile = os.path.join(os.path.dirname(__file__), 'data', 'small_graph.txt') g = CSFGraph(inputfile) self.g = g str(g)
def test_csfgraph_tolerates_missing_node_info(self): het_g = CSFGraph(edge_file=self.edge_file, node_file=self.node_file_missing_nodes) self.assertEqual(het_g.index_to_nodetype_map[2], het_g.default_node_type)
def test_csfgraph_requires_arg(self): with self.assertRaises(Exception) as context: CSFGraph() # missing edge arg self.assertTrue(str('missing' in context.exception))