Beispiel #1
0
def disease_gene_embeddings(training_file, output_file, p, q, gamma, use_gamma,
                            walk_length, num_walks, dimensions, window_size,
                            workers, num_steps, display_step):
    """
    Generate disease gene embeddings
    """
    logging.basicConfig(level=logging.INFO)
    print("Reading training file %s" % training_file)
    training_graph = CSFGraph(training_file)
    print(training_graph)
    training_graph.print_edge_type_distribution()

    hetgraph = xn2v.hetnode2vec.N2vGraph(training_graph, p, q, gamma,
                                         use_gamma)
    walks = hetgraph.simulate_walks(num_walks, walk_length)
    worddictionary = training_graph.get_node_to_index_map()
    reverse_worddictionary = training_graph.get_index_to_node_map()

    numberwalks = []
    for w in walks:
        nwalk = []
        for node in w:
            i = worddictionary[node]
            nwalk.append(i)
        numberwalks.append(nwalk)

    model = SkipGramWord2Vec(numberwalks,
                             worddictionary=worddictionary,
                             reverse_worddictionary=reverse_worddictionary,
                             num_steps=num_steps)
    model.train(display_step=display_step)
    model.write_embeddings(output_file)
Beispiel #2
0
def karate_test(training_file, test_file, output_file, p, q, gamma, use_gamma,
                walk_length, num_walks):
    training_graph = CSFGraph(training_file)
    hetgraph = xn2v.hetnode2vec.N2vGraph(training_graph, p, q, gamma,
                                         use_gamma)

    walks = hetgraph.simulate_walks(num_walks, walk_length)
    worddictionary = training_graph.get_node_to_index_map()
    reverse_worddictionary = training_graph.get_index_to_node_map()

    numberwalks = []
    for w in walks:
        nwalk = []
        for node in w:
            i = worddictionary[node]
            nwalk.append(i)
        numberwalks.append(nwalk)

    model = SkipGramWord2Vec(numberwalks,
                             worddictionary=worddictionary,
                             reverse_worddictionary=reverse_worddictionary,
                             num_steps=1000)
    model.train(display_step=100)
    output_filenname = 'karate.embedded'
    model.write_embeddings(output_filenname)

    test_graph = CSFGraph(test_file)
    path_to_embedded_graph = output_filenname
    parameters = {
        'edge_embedding_method': "hadamard",
        'portion_false_edges': 1
    }

    lp = LinkPrediction(
        training_graph, test_graph, path_to_embedded_graph, params=parameters
    )  #TODO:modify this part to work with new link prediction

    lp.predict_links()
    lp.output_Logistic_Reg_results()
    def test_embedding(self):
        training_file = os.path.join(os.path.dirname(__file__), 'data',
                                     'karate.train')
        output_file = os.path.join(os.path.dirname(__file__), 'data',
                                   'disease.embedded')
        training_graph = CSFGraph(training_file)
        training_graph.print_edge_type_distribution()

        p = 1
        q = 1
        gamma = 1
        useGamma = False
        hetgraph = xn2v.hetnode2vec.N2vGraph(training_graph, p, q, gamma,
                                             useGamma)

        walk_length = 80
        num_walks = 25
        walks = hetgraph.simulate_walks(num_walks, walk_length)

        worddictionary = training_graph.get_node_to_index_map()
        reverse_worddictionary = training_graph.get_index_to_node_map()

        numberwalks = []
        for w in walks:
            nwalk = []
            for node in w:
                i = worddictionary[node]
                nwalk.append(i)
            numberwalks.append(nwalk)

        model = SkipGramWord2Vec(numberwalks,
                                 worddictionary=worddictionary,
                                 reverse_worddictionary=reverse_worddictionary,
                                 num_steps=100)
        model.train(display_step=10)
        model.write_embeddings(output_file)
p = 1
q = 1
gamma = 1
useGamma = False
hetgraph = xn2v.hetnode2vec.N2vGraph(pos_train_graph, p, q, gamma, useGamma)

walk_length = 80
num_walks = 100
dimensions = 128
window_size = 10
workers = 8
walks = hetgraph.simulate_walks(num_walks, walk_length)

worddictionary = pos_train_graph.get_node_to_index_map()
reverse_worddictionary = pos_train_graph.get_index_to_node_map()

walks_integer_nodes = []
for w in walks:
    nwalk = []
    for node in w:
        i = worddictionary[node]
        nwalk.append(i)
    walks_integer_nodes.append(nwalk)

model = SkipGramWord2Vec(walks_integer_nodes,
                         worddictionary=worddictionary,
                         reverse_worddictionary=reverse_worddictionary,
                         num_steps=100)
model.train(display_step=2)
p = 1
q = 1
gamma = 1
useGamma = False
graph = xn2v.hetnode2vec.N2vGraph(g, p, q, gamma, useGamma)

walk_length = 80
num_walks = 100
walks = graph.simulate_walks(num_walks, walk_length)
dimensions = 128
window_size = 10
workers = 8

worddictionary = g.get_node_to_index_map()
reverse_worddictionary = g.get_index_to_node_map()

walks_integer_nodes = []
for w in walks:
    nwalk = []
    for node in w:
        i = worddictionary[node]
        nwalk.append(i)
    walks_integer_nodes.append(nwalk)

model = SkipGramWord2Vec(walks_integer_nodes,
                         worddictionary=worddictionary,
                         reverse_worddictionary=reverse_worddictionary,
                         num_steps=100)
model.train(display_step=2)