def graphAnalyzer(graph, kmeans=False):
    """Argument: the path to find a .gml graph file, boolean : if yes using scikit-learn KMean to cluster otherwise
    using our dbscan algorithm.
    Will page rank and cluster the nodes in order to return the highest page rank page in the three biggest clusters
    It also print a graph in order to visualize the clustering"""
    G = nx.read_gml(graph)
    G = removeIsolatedNodes(G)  # removing meaningless nodes
    G.remove_node(list(G.nodes)[0])

    # ----------------------------------- PageRank Computation --------------------------------------

    # creating a PageRank object
    pr = PageRank.PageRank(G)
    pr.constructDispersionMatrix(G)
    pr = pr.getPageRank()

    # ----------------------------------- Clustering Computation --------------------------------------

    # constructing network layout
    forceatlas2 = fa2.ForceAtlas2(
        # Behavior alternatives
        outboundAttractionDistribution=False,  # Dissuade hubs
        linLogMode=False,  # NOT IMPLEMENTED
        adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
        edgeWeightInfluence=0,

        # Performance
        jitterTolerance=.01,  # Tolerance
        barnesHutOptimize=True,
        barnesHutTheta=1.2,
        multiThreaded=False,  # NOT IMPLEMENTED

        # Tuning
        scalingRatio=1,
        strongGravityMode=True,
        gravity=200,
        # Log
        verbose=True)

    pos = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=1000)

    if kmeans:
        # converting positions into a list of np.array
        pos_list = [np.array([elt[0], elt[1]]) for key, elt in pos.items()]

        # clustering the nodes according to the kmeans algorithm
        clusters = Kmeans.kmeans(pos_list, 8, 0.01, 300)

    else:
        pos = {key: np.array([elt[0], elt[1]]) for key, elt in pos.items()}
        pos_transf = dbscan.transf(
            pos)  # changing position format to be able to use it in DBSCAN
        clusters = dbscan.dbscan(pos_transf, 40, 20)  # clustering

    cluster_with_pr = associatingPageRankToNode(pr, clusters)

    # sorting each cluster according to page rank result
    for key, value in cluster_with_pr.items():
        cluster_with_pr[key] = sorted(value,
                                      key=lambda item: (item[1], item[0]))

    # rendering the suggested pages and their page rank
    print("\nThe recommanded pages are the following :")
    for key, value in cluster_with_pr.items():
        try:
            node_index = value[-1][0]  # retrieving the node index
            title_node = re.search(
                r'titles=(.*?)\&',
                list(G.nodes())
                [node_index])  # getting the title of the Wikipedia page
            print("•", title_node.group(1), "- with a page rank of ",
                  value[-1][1])
        except IndexError:
            pass

    # ----------------------------------- Graph Creation --------------------------------------

    # each node within a cluster have the same color
    get_colors = lambda n: list(
        map(lambda i: "#" + "%06x" % random.randint(0, 0xFFFFFF), range(n)))

    colors = get_colors(len(clusters.keys()) + 1)
    node_color = ['black' for _ in range(len(G.nodes()))]
    for key, value in clusters.items():
        for elt in value:
            node_color[elt] = colors[key]

    nx.draw(G.to_undirected(),
            pos,
            node_size=2,
            width=.05,
            edge_color='grey',
            node_color=node_color)
    plt.savefig("graph_with_layout.png")
Example #2
0
def _compute_pos(
    adjacency_solid,
    layout=None,
    random_state=0,
    init_pos=None,
    adj_tree=None,
    root=0,
    layout_kwds=None,
):
    import networkx as nx

    np.random.seed(random_state)
    random.seed(random_state)
    nx_g_solid = nx.Graph(adjacency_solid)
    if layout is None:
        layout = "fr"
    if layout == "fa":
        try:
            import fa2
        except Exception:
            logg.warn(
                "Package 'fa2' is not installed, falling back to layout 'fr'."
                "To use the faster and better ForceAtlas2 layout, "
                "install package 'fa2' (`pip install fa2`).")
            layout = "fr"
    if layout == "fa":
        init_coords = (np.random.random(
            (adjacency_solid.shape[0],
             2)) if init_pos is None else init_pos.copy())
        forceatlas2 = fa2.ForceAtlas2(
            outboundAttractionDistribution=False,
            linLogMode=False,
            adjustSizes=False,
            edgeWeightInfluence=1.0,
            jitterTolerance=1.0,
            barnesHutOptimize=True,
            barnesHutTheta=1.2,
            multiThreaded=False,
            scalingRatio=2.0,
            strongGravityMode=False,
            gravity=1.0,
            verbose=False,
        )
        iterations = (
            layout_kwds["maxiter"] if "maxiter" in layout_kwds else
            layout_kwds["iterations"] if "iterations" in layout_kwds else 500)
        pos_list = forceatlas2.forceatlas2(adjacency_solid,
                                           pos=init_coords,
                                           iterations=iterations)
        pos = {n: [p[0], -p[1]] for n, p in enumerate(pos_list)}
    elif layout == "eq_tree":
        nx_g_tree = nx.Graph(adj_tree)
        from scanpy.plotting._utils import hierarchy_pos

        pos = hierarchy_pos(nx_g_tree, root)
        if len(pos) < adjacency_solid.shape[0]:
            raise ValueError("This is a forest and not a single tree. "
                             "Try another `layout`, e.g., {'fr'}.")
    else:
        # igraph layouts
        g = get_igraph_from_adjacency(adjacency_solid)
        if "rt" in layout:
            g_tree = get_igraph_from_adjacency(adj_tree)
            root = root if isinstance(root, list) else [root]
            pos_list = g_tree.layout(layout, root=root).coords
        elif layout == "circle":
            pos_list = g.layout(layout).coords
        else:
            if init_pos is None:
                init_coords = np.random.random(
                    (adjacency_solid.shape[0], 2)).tolist()
            else:
                init_pos = init_pos.copy()
                init_pos[:, 1] *= -1  # to be checked
                init_coords = init_pos.tolist()
            try:
                layout_kwds.update({"seed": init_coords})
                pos_list = g.layout(layout, weights="weight",
                                    **layout_kwds).coords
            except AttributeError:  # hack for empty graphs...
                pos_list = g.layout(layout, **layout_kwds).coords
        pos = {n: [p[0], -p[1]] for n, p in enumerate(pos_list)}
    if len(pos) == 1:
        pos[0] = (0.5, 0.5)
    pos_array = np.array([pos[n] for count, n in enumerate(nx_g_solid)])
    return pos_array
def fa2_pos(graph, gravity=40, scalingRatio=4, **kwargs):
    # Seed positions with quick FA2
    fa = fa2.ForceAtlas2(gravity=gravity, scalingRatio=scalingRatio, **kwargs)
    pos = fa.forceatlas2_networkx_layout(graph, iterations=100)

    return pos