Exemple #1
0
def karate_test(pos_train_file, pos_valid_file, pos_test_file, neg_train_file,
                neg_valid_file, neg_test_file, embed_graph, p, q, walk_length,
                num_walks, num_epochs, classifier, edge_embed_method,
                skipValidation, output):
    pos_train_graph = CSFGraph(pos_train_file)
    pos_valid_graph = CSFGraph(pos_valid_file)
    pos_test_graph = CSFGraph(pos_test_file)
    neg_train_graph = CSFGraph(neg_train_file)
    neg_valid_graph = CSFGraph(neg_valid_file)
    neg_test_graph = CSFGraph(neg_test_file)
    # Graph (node) embeding using SkipGram as the word2vec model, with 2 epochs.
    graph = embiggen.random_walk_generator.N2vGraph(pos_train_graph, p, q)
    walks = graph.simulate_walks(num_walks, walk_length)
    worddictionary = pos_train_graph.get_node_to_index_map()
    reverse_worddictionary = pos_train_graph.get_index_to_node_map()
    model = SkipGramWord2Vec(walks,
                             worddictionary=worddictionary,
                             reverse_worddictionary=reverse_worddictionary,
                             num_epochs=num_epochs)
    model.train()
    write_embeddings(embed_graph, model.embedding, reverse_worddictionary)

    # Link prediction on the pos/neg train/valid/test sets using RF classifier
    lp = LinkPrediction(pos_train_graph, pos_valid_graph, pos_test_graph,
                        neg_train_graph, neg_valid_graph, neg_test_graph,
                        embed_graph, edge_embed_method, classifier,
                        skipValidation, output)
    lp.prepare_edge_and_node_labels()
    lp.predict_links()
    lp.output_classifier_results()
Exemple #2
0
    def setUp(self):
        curdir = os.path.dirname(__file__)
        pos_train = os.path.join(curdir, 'data/ppismall/pos_train_edges')
        pos_train = os.path.abspath(pos_train)
        training_graph = CSFGraph(pos_train)

        # obtain data needed to build model
        worddictionary = training_graph.get_node_to_index_map()
        reverse_worddictionary = training_graph.get_index_to_node_map()
        # initialize n2v object
        p, q = 1, 1
        self.number_of_nodes_in_training = training_graph.node_count()
        self.n2v_graph = N2vGraph(csf_graph=training_graph, p=p, q=q)

        # generate random walks
        self.walk_length = 10
        self.num_walks = 5
        self.walks = self.n2v_graph.simulate_walks(num_walks=self.num_walks, walk_length=self.walk_length)
        # walks is now a list of lists of ints

        # build cbow model
        self.cbow = ContinuousBagOfWordsWord2Vec(self.walks,
                                                 worddictionary=worddictionary,
                                                 reverse_worddictionary=reverse_worddictionary,
                                                 num_epochs=2)

        self.cbow.train()
Exemple #3
0
    def setUp(self):

        # read in sample data
        current_directory = os.path.dirname(__file__)
        self.data_dir = os.path.join(current_directory, 'data')
        pos_train = os.path.abspath(self.data_dir +
                                    '/ppismall/pos_train_edges')

        # read data into graph
        training_graph = CSFGraph(pos_train)
        worddictionary = training_graph.get_node_to_index_map()
        self.reverse_worddictionary = training_graph.get_index_to_node_map()

        # generate random walks
        n2v_graph = N2vGraph(training_graph, 1, 1)
        walks = n2v_graph.simulate_walks(5, 10)

        # learn embeddings
        self.model = ContinuousBagOfWordsWord2Vec(
            walks,
            worddictionary=worddictionary,
            reverse_worddictionary=self.reverse_worddictionary,
            num_epochs=2)

        # create temporary directory to write data to
        self.temp_dir_loc = os.path.abspath(self.data_dir + '/temp')
        os.mkdir(self.temp_dir_loc)

        return None