def load_dms(removed_nodes: List[int],
             save_info: sl.MemoryAccess,
             num_iterations: int,
             use_specific_iter: int = None):
    if num_iterations == 1:
        assert (use_specific_iter is not None)
        if save_info.has_distance_matrix(removed_nodes=removed_nodes,
                                         iteration=use_specific_iter):
            yield save_info.load_distance_matrix(removed_nodes=removed_nodes,
                                                 iteration=use_specific_iter)
        else:
            emb = save_info.load_embedding(removed_nodes=removed_nodes,
                                           iteration=use_specific_iter)
            dm = cdm.calc_distances(model=emb,
                                    save_info=save_info,
                                    removed_nodes=removed_nodes,
                                    iteration=use_specific_iter)
            yield dm
    else:
        for i in range(num_iterations):
            if save_info.has_distance_matrix(removed_nodes=removed_nodes,
                                             iteration=i):
                yield save_info.load_distance_matrix(
                    removed_nodes=removed_nodes, iteration=i)
            else:
                emb = save_info.load_embedding(removed_nodes=removed_nodes,
                                               iteration=i)
                dm = cdm.calc_distances(model=emb,
                                        save_info=save_info,
                                        removed_nodes=removed_nodes,
                                        iteration=i)
                yield dm
Exemple #2
0
def __calc_dm(graph: gc.Graph, removed_nodes: [int],
              save_info: sl.MemoryAccess, i: int) -> (int, pd.DataFrame):
    if save_info.has_distance_matrix(removed_nodes=removed_nodes, iteration=i):
        # print("Distance matrix for removed nodes", removed_nodes, "and iteration", i, "is already trained")
        return i, save_info.load_distance_matrix(removed_nodes=removed_nodes,
                                                 iteration=i)
    else:
        # print(f'Calculate distance matrix for removed nodes {removed_nodes} iteration {i}')
        # model = embedding_function(graph=graph, save_info=save_info, removed_nodes=removed_nodes, iteration=i)
        # thows error if embedding does not exist

        model = save_info.load_embedding(removed_nodes=removed_nodes,
                                         iteration=i)
        dm = cd.calc_distances(model=model,
                               graph=graph,
                               save_info=save_info,
                               removed_nodes=removed_nodes,
                               iteration=i,
                               save=False)

        save_info.save_distance_matrix(removed_nodes=removed_nodes,
                                       iteration=i,
                                       dm=dm)

        return i, dm
Exemple #3
0
def train_node2vec_embedding(edge_list_path: str,
                             graph: Graph,
                             save_info: sl.MemoryAccess,
                             removed_nodes: [int],
                             iteration: int,
                             epochs: int,
                             dim: int,
                             walk_length: int,
                             num_of_walks_per_node: int,
                             window_size: int,
                             alpha: float,
                             return_embedding: bool = False,
                             check_for_existing: bool = True):
    target = save_info.get_embedding_name(removed_nodes=removed_nodes,
                                          iteration=iteration)

    if check_for_existing and os.path.exists(target + ".emb"):
        #print('Embedding for removed nodes {} and iteration {} already exists'.format(removed_nodes, iteration))
        if return_embedding:
            return save_info.load_embedding(removed_nodes=removed_nodes,
                                            iteration=iteration)
    else:
        target_path = os.path.abspath(target + "_path.emb")

        # create walks

        # execute path training
        wd = os.getcwd()
        os.chdir(config.NODE2VEC_SNAP_DIR)

        subprocess.call('./node2vec \
            -i:"' + edge_list_path + '" \
            -o:"' + target_path + '" \
            -e:' + str(epochs) + " -d:" + str(dim) + " -l:" +
                        str(walk_length) + " -r:" +
                        str(num_of_walks_per_node) + " -k:" +
                        str(window_size) + " -ow",
                        shell=True)  # output random walks only
        os.chdir(wd)

        # end create paths

        class Walks:
            def __init__(self, file):
                self.file = file

            def __iter__(self):
                with open(target_path, "r") as f:
                    for line in f:
                        line = line.strip("\n").split(" ")
                        # assert (all(list(map(lambda node: node in graph.nodes(), list(map(int, line))))))
                        yield line

        walks = Walks(target_path)

        # train word2vec
        emb_result = gensim.models.Word2Vec(walks,
                                            size=dim,
                                            iter=epochs,
                                            window=window_size,
                                            min_count=1,
                                            sg=1,
                                            workers=config.NUM_CORES,
                                            alpha=alpha)

        os.remove(target_path)

        save_info.save_embedding(removed_nodes, iteration, emb_result)

        if return_embedding:
            return emb_result