def detect_hubs(self):
        self.hubs = []

        # [(node1, node2...), (degree1, degree2, ...)]
        node_degrees = list(zip(*self.G.degree()))
        degrees = node_degrees[1]
        nodes = node_degrees[0]

        avg_degree = np.mean(degrees)
        std_degree = np.std(degrees)

        for idx, node in enumerate(nodes):
            deg = degrees[idx]
            if(deg > (avg_degree + std_degree)):
                self.hubs.append(node)

        debug('Hubs: ', self.hubs, ", avg degree: ", avg_degree, ", std degree: ", std_degree)
def run(opts: Options,
        method: Node2Vec,
        title=None,
        cluster2color=['green', 'red', 'blue', 'orange', 'purple', 'cyan'],
        num_clusters=6,
        zero_indexed=False,
        tsne_perplexity=12,
        draw=["graph", "tsne"],
        save=False,
        save_path=None):

    G, clf_results = main(opts, method)
    model = method.model

    if (draw or save):
        debug("Generating colors...")
        colors = cluster.cluster_colors(graph=G,
                                        model=model,
                                        cluster2color=cluster2color,
                                        num_clusters=num_clusters)
        colors_idx = utils.color_by_index(colors, list(model.wv.vocab),
                                          zero_indexed)

    if (draw):
        debug("Drawing...")
        if ("graph" in draw):
            debug("Drawing graph...")
            layout = nx.drawing.layout.spring_layout(G)
            drawing.graph(graph=G,
                          model=model,
                          colors=colors,
                          layout=layout,
                          title=title)

        if ("tsne" in draw):
            debug("Drawing TSNE...")
            tsne_model = tsne.model(model, perplexity=tsne_perplexity)
            drawing.tsne(tsne_model, colors_idx, title=title)

    if (save):
        assert title, "title must be set if save=True"
        assert save_path, "save_path must be set if save=True"

        utils.save_gefx(G, colors, title, save_path, zero_indexed)

    return clf_results
    def preprocess_transition_probs(self):
        '''
        Preprocessing of transition probabilities for guiding the random walks.
        '''
        G = self.G

        debug("Preprocessing nodes...")
        alias_nodes = {}
        for node in tqdm(G.nodes(), disable=(not DEBUG)):
            # get all weights of node's neighbors
            unnormalized_probs = [
                G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node))
            ]
            # normalize weights with sum
            norm_const = sum(unnormalized_probs)
            normalized_probs = [
                float(u_prob) / norm_const for u_prob in unnormalized_probs
            ]
            alias_nodes[node] = self.alias_setup(normalized_probs)

        debug("Preprocessing edges...")
        alias_edges = {}

        # Split edges up in chunks
        workers = os.cpu_count()
        edge_divisor = workers**2
        chunk_size = int(G.size() / edge_divisor) + 1
        chunks = list(self.split_chunks(G.edges(), chunk_size))

        # Creating worker pool & running
        #maxtasks = int(len(chunks) / 10)

        #with mp.Pool(processes=workers, maxtasksperchild=maxtasks) as pool:
        with mp.Pool(processes=workers) as pool:
            debug(
                f"Workers {workers}, chunks {len(chunks)} of size {chunk_size}"
            )

            result = list(
                tqdm(pool.imap(self.preprocess_edge_chunk, chunks),
                     total=len(chunks),
                     disable=(not DEBUG)))

            # Combining results into one
            for d in result:
                alias_edges.update(d)

            del result
            pool.close()
            pool.join()

        self.alias_nodes = alias_nodes
        self.alias_edges = alias_edges
        return
    def run(self):
        debug("Preprocessing transition probs...")
        self.walker.preprocess_transition_probs()

        debug("Simulating walks...")
        walks = self.walker.simulate_walks(self.opts.num_walks,
                                           self.opts.walk_length)

        debug("Learning model...")
        # Converts every walk value (= node) from int to str for word2vec (=> words)
        walks_str = tqdm([list(map(str, walk)) for walk in walks],
                         disable=(not DEBUG))
        self.model = Word2Vec(walks_str,
                              workers=self.opts.workers,
                              size=self.opts.dim,
                              min_count=self.opts.min_count,
                              window=self.opts.window,
                              sg=1)
 def __init__(self, opts: MethodOpts, h=1):
     debug("Node2Vec opts: ", opts.__dict__)
     self.opts = opts
     self.h = h
Beispiel #6
0
def main(opts: Options, model: node2vec.Node2Vec):
    debug("Options: ", opts.__dict__)
    random.seed(32)
    np.random.seed(32)

    # Reading graph
    debug("Reading graph...")
    G = None
    if opts.graph_format == 'adjlist':
        G = graph.read_adjlist(filename=opts.input)
    elif opts.graph_format == 'edgelist':
        G = graph.read_edgelist(filename=opts.input, weighted=opts.weighted)

    debug(f"Graph: {len(G.nodes())} nodes, {G.size()} edges")

    # Loading/learning embeddings
    if (opts.output and os.path.isfile(opts.output)):
        debug(f"Model {opts.output} exists, loading from file...")
        model.load_embeddings(opts.output)
    else:
        debug("Training model...")
        model.init_walker(G)
        model.run()

        if opts.output:
            debug("Saving embeddings...")
            model.save_embeddings(opts.output)

    # Classification
    clf_results = None
    if opts.label_file:
        X, Y = classify.read_node_label(opts.label_file)
        debug("Training classifier using {:.2f}% nodes...".format(
            opts.training_ratio * 100))
        clf = classify.Classifier(vectors=model.get_vectors(),
                                  clf=LogisticRegression(solver='liblinear'))
        clf_results = clf.split_train_evaluate(X,
                                               Y,
                                               opts.training_ratio,
                                               iter=opts.clf_iterations)

    return G, clf_results
 def __init__(self, opts: MethodOpts, p=1.0, q=1.0):
     debug("Node2Vec opts: ", opts.__dict__)
     self.opts = opts
     self.p = p
     self.q = q
Beispiel #8
0
 def __init__(self, opts: MethodOpts, jump_prob=0):
     debug("Node2Vec opts: ", opts.__dict__)
     self.opts = opts
     self.jump_prob = jump_prob