コード例 #1
0
ファイル: test_asyn_fluid.py プロジェクト: ynux/networkx
def test_five_clique_ring():
    test = Graph()

    # c1
    test.add_edge('1a', '1b')
    test.add_edge('1a', '1c')
    test.add_edge('1a', '1d')
    test.add_edge('1b', '1c')
    test.add_edge('1b', '1d')
    test.add_edge('1c', '1d')

    # c2
    test.add_edge('2a', '2b')
    test.add_edge('2a', '2c')
    test.add_edge('2a', '2d')
    test.add_edge('2b', '2c')
    test.add_edge('2b', '2d')
    test.add_edge('2c', '2d')

    # c3
    test.add_edge('3a', '3b')
    test.add_edge('3a', '3c')
    test.add_edge('3a', '3d')
    test.add_edge('3b', '3c')
    test.add_edge('3b', '3d')
    test.add_edge('3c', '3d')

    # c4
    test.add_edge('4a', '4b')
    test.add_edge('4a', '4c')
    test.add_edge('4a', '4d')
    test.add_edge('4b', '4c')
    test.add_edge('4b', '4d')
    test.add_edge('4c', '4d')

    # c5
    test.add_edge('5a', '5b')
    test.add_edge('5a', '5c')
    test.add_edge('5a', '5d')
    test.add_edge('5b', '5c')
    test.add_edge('5b', '5d')
    test.add_edge('5c', '5d')

    # connections
    test.add_edge('1a', '2c')
    test.add_edge('2a', '3c')
    test.add_edge('3a', '4c')
    test.add_edge('4a', '5c')
    test.add_edge('5a', '1c')

    # ground truth
    ground_truth = set([frozenset(['1a', '1b', '1c', '1d']),
                        frozenset(['2a', '2b', '2c', '2d']),
                        frozenset(['3a', '3b', '3c', '3d']),
                        frozenset(['4a', '4b', '4c', '4d']),
                        frozenset(['5a', '5b', '5c', '5d'])])

    communities = asyn_fluidc(test, 5, seed=9)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #2
0
def test_two_nodes():
    test = Graph()

    test.add_edge("a", "b")

    # ground truth
    ground_truth = {frozenset(["a"]), frozenset(["b"])}

    communities = asyn_fluidc(test, 2)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #3
0
def test_single_node():
    test = Graph()

    test.add_node("a")

    # ground truth
    ground_truth = {frozenset(["a"])}

    communities = asyn_fluidc(test, 1)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #4
0
ファイル: test_asyn_fluid.py プロジェクト: ynux/networkx
def test_two_nodes():
    test = Graph()

    test.add_edge('a', 'b')

    # ground truth
    ground_truth = set([frozenset(['a']), frozenset(['b'])])

    communities = asyn_fluidc(test, 2)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #5
0
def algorithm_asyn_fluidc(G, gt_communities_count):
    """
    Async Fluidc community detection algorithm
    Parés F., Garcia-Gasulla D. et al. “Fluid Communities: A Competitive and Highly Scalable Community Detection Algorithm”.
    https://arxiv.org/pdf/1703.09307.pdf
    """
    communities = [
        list(community) for community in asyn_fluidc(
            G, gt_communities_count, max_iter=100, seed=None)
    ]

    return communities
コード例 #6
0
def get_benchmark_amis(G,gt):
    # Louvain
    louv = community.best_partition(G)
    louvc = []
    for idx,val in louv.items():
        louvc.append(val)

    louv_ami = metrics.adjusted_mutual_info_score(gt,louvc)
    
    # Fluid communities
    fluid = asyn_fluidc(G,2)
    list_nodes = [set(c) for c in fluid]
    est_idx = np.zeros((nx.number_of_nodes(G),))
    for i in range(len(list_nodes)):
        for idx in list_nodes[i]:
            est_idx[idx] = i

    fluid_ami = metrics.adjusted_mutual_info_score(gt,est_idx)
    
    # FastGreedy
    list_nodes = list(greedy_modularity_communities(G))
    est_idx = np.zeros((nx.number_of_nodes(G),))
    for i in range(len(list_nodes)):
        for idx in list_nodes[i]:
            est_idx[idx] = i

    fg_ami = metrics.adjusted_mutual_info_score(gt,est_idx)
    
    # Infomap
    im = Infomap()
    for node in G.nodes:
        im.add_node(node)
    for edge in G.edges:
        im.add_link(edge[0], edge[1])
        im.add_link(edge[1],edge[0])
    # Run the Infomap search algorithm to find optimal modules
    im.run()
    # print(f"Found {im.num_top_modules} modules with Infomap")
    est_idx = np.zeros((nx.number_of_nodes(G),))
    for node in im.tree:
        if node.is_leaf:
            est_idx[node.node_id] = node.module_id

    im_ami = metrics.adjusted_mutual_info_score(gt,est_idx)
    
    benchmark = {'Louvain':louv_ami,
            'Fluid':fluid_ami,
            'FastGreedy':fg_ami,
            'Infomap':im_ami}
    
    return benchmark
コード例 #7
0
ファイル: topology.py プロジェクト: nasimrahaman/CityGraph
    def add_edges_between_centroids(self, edge_types, num_centroids, rng, attribute_name='distance'):
        """
        This algorithm creates edges between central nodes. These central nodes
        are determined by a clustering algorithm (here the Fluid Communities algorithm).

        :param iter edge_types: Types of the edges to add.
        :param int num_centroids: Number of centroids.
        :param rng: Random number generator.
        :type rng: :py:class:`RandomGenerator<city_graph.utils.RandomGenerator>`
        """

        print("[Topology] Starting building edges between %s central nodes." % num_centroids)

        # Calculate clusters
        # TODO: I think it would make sense to instead use e.g. a k-means for two reasons:
        #  * this algo assumes that the clusters have the same density, which is not necessarily
        # application to cities (some areas are more crowded)
        #  *  the implementation needs the graph to be fully connected to begin with. It seems
        # to be a limitation
        clusters = (list(c) for c in asyn_fluidc(
            self.graph, k=num_centroids, seed=rng.rand_int()))

        # Extract centroids: we take the node with the highest degree
        centroids = [c[int(np.argmax([self.graph.degree[n] for n in c]))] for c in clusters]

        # Create temporary graph for the centroids
        tmp_graph = Graph()
        tmp_graph.add_nodes_from(centroids)
        # We need the combinations because the graph is undirected and we dont want self-edges

        for n1, n2 in combinations(centroids, 2):
            tmp_graph.add_edge(n1, n2, **{attribute_name: self.distance(n1, n2)})

        # Calculate subgraph with the minimum sum of edge weights
        # TODO: to investigate why we do this here...
        subgraph = minimum_spanning_tree(tmp_graph, weight=attribute_name)

        # Build edges
        # Here we can reuse the previously calculated distances
        old_num_edges = self.num_of_edges
        for (n1, n2) in subgraph.edges:
            for edge_type in edge_types:

                # TODO: exception might be raised here because we now check
                # That there is no outgoing/incoming edges.
                # Should we fix when we use an actual triangular matrix
                with suppress(RuntimeError):
                    self.add_edge(n1, n2, edge_type, **subgraph[n1][n2])

        # Inform that edges have been built
        print("[Topology] %i edges have been created" % (self.num_of_edges - old_num_edges))
コード例 #8
0
def apply(frequents_label, frequents_encodings, parameters=None):
    """
    Apply a clustering algorithm (modularity maximization) on the encodings

    Parameters
    ---------------
    frequents_label
        Label of the sequences
    frequents_encodings
        Encodings of the sequences
    parameters
        Parameters of the algorithm:
            nc => numbers of clusters
            p1 => weight of the first term
            p2 => weight of the second term

    Returns
    ----------------
    communities
        Communities
    """
    if parameters is None:
        parameters = {}

    nc = parameters[NC] if NC in parameters else DEFAULT_NC

    p1 = parameters[P1] if P1 in parameters else DEFAULT_P1
    p2 = parameters[P2] if P2 in parameters else DEFAULT_P2

    G = nx.Graph()

    for i in range(len(frequents_encodings)):
        G.add_node(i)

    for i in range(len(frequents_encodings)):
        for j in range(i + 1, len(frequents_encodings)):
            sim1 = np.linalg.norm(frequents_encodings[i] -
                                  frequents_encodings[j])
            as1 = set(frequents_label[i].split())
            as2 = set(frequents_label[j].split())
            sim2 = len(as1.intersection(as2)) / len(as1.union(as2))
            G.add_edge(i, j, weight=p1 * sim1 + p2 * sim2)

    communities = list(asyn_fluidc(G, nc))

    return communities
コード例 #9
0
def test_two_clique_communities():
    test = Graph()

    # c1
    test.add_edge("a", "b")
    test.add_edge("a", "c")
    test.add_edge("b", "c")

    # connection
    test.add_edge("c", "d")

    # c2
    test.add_edge("d", "e")
    test.add_edge("d", "f")
    test.add_edge("f", "e")

    # ground truth
    ground_truth = {frozenset(["a", "c", "b"]), frozenset(["e", "d", "f"])}

    communities = asyn_fluidc(test, 2, seed=7)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #10
0
def test_two_clique_communities():
    test = Graph()

    # c1
    test.add_edge('a', 'b')
    test.add_edge('a', 'c')
    test.add_edge('b', 'c')

    # connection
    test.add_edge('c', 'd')

    # c2
    test.add_edge('d', 'e')
    test.add_edge('d', 'f')
    test.add_edge('f', 'e')

    # ground truth
    ground_truth = {frozenset(['a', 'c', 'b']), frozenset(['e', 'd', 'f'])}

    communities = asyn_fluidc(test, 2, seed=7)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #11
0
    return mutual_info


###########################################################
###########################################################
# Method: Fluid communities
###########################################################
# Raw data
if not nx.is_connected(G):
    #print('---Fluid community requires connected graph, skipping raw version---')
    scores['fluid-raw'] = 'failed'
    runtimes['fluid-raw'] = 'failed'
else:
    time_s = time.time()
    comp = asyn_fluidc(G.to_undirected(), k=num_partitions)
    list_nodes = [frozenset(c) for c in comp]
    est_idx = np.zeros((num_nodes, ))
    for i in range(len(list_nodes)):
        for idx in list_nodes[i]:
            est_idx[idx] = i
    runtime = time.time() - time_s
    mutual_info = metrics.adjusted_mutual_info_score(database['labels'],
                                                     est_idx)
    scores['fluid-raw'] = mutual_info
    runtimes['fluid-raw'] = runtime

# Noisy data
if not nx.is_connected(nG):
    print(
        '---Fluid community requires connected graph, skipping noisy version---'
コード例 #12
0
def test_five_clique_ring():
    test = Graph()

    # c1
    test.add_edge("1a", "1b")
    test.add_edge("1a", "1c")
    test.add_edge("1a", "1d")
    test.add_edge("1b", "1c")
    test.add_edge("1b", "1d")
    test.add_edge("1c", "1d")

    # c2
    test.add_edge("2a", "2b")
    test.add_edge("2a", "2c")
    test.add_edge("2a", "2d")
    test.add_edge("2b", "2c")
    test.add_edge("2b", "2d")
    test.add_edge("2c", "2d")

    # c3
    test.add_edge("3a", "3b")
    test.add_edge("3a", "3c")
    test.add_edge("3a", "3d")
    test.add_edge("3b", "3c")
    test.add_edge("3b", "3d")
    test.add_edge("3c", "3d")

    # c4
    test.add_edge("4a", "4b")
    test.add_edge("4a", "4c")
    test.add_edge("4a", "4d")
    test.add_edge("4b", "4c")
    test.add_edge("4b", "4d")
    test.add_edge("4c", "4d")

    # c5
    test.add_edge("5a", "5b")
    test.add_edge("5a", "5c")
    test.add_edge("5a", "5d")
    test.add_edge("5b", "5c")
    test.add_edge("5b", "5d")
    test.add_edge("5c", "5d")

    # connections
    test.add_edge("1a", "2c")
    test.add_edge("2a", "3c")
    test.add_edge("3a", "4c")
    test.add_edge("4a", "5c")
    test.add_edge("5a", "1c")

    # ground truth
    ground_truth = {
        frozenset(["1a", "1b", "1c", "1d"]),
        frozenset(["2a", "2b", "2c", "2d"]),
        frozenset(["3a", "3b", "3c", "3d"]),
        frozenset(["4a", "4b", "4c", "4d"]),
        frozenset(["5a", "5b", "5c", "5d"]),
    }

    communities = asyn_fluidc(test, 5, seed=9)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #13
0
 def fluid_community(self, k=2):
     """
     Returns communities in G as detected by Fluid Communities algorithm.
     """
     undirected_g = self.G.to_undirected()
     return list(asyn_fluid.asyn_fluidc(undirected_g, k))
コード例 #14
0
def five_clique_ring():
    """Not auto-tested (not named test_...) due to cross-version seed issues
    python3.4 in particular gives different results.
    """
    test = Graph()

    # c1
    test.add_edge('1a', '1b')
    test.add_edge('1a', '1c')
    test.add_edge('1a', '1d')
    test.add_edge('1b', '1c')
    test.add_edge('1b', '1d')
    test.add_edge('1c', '1d')

    # c2
    test.add_edge('2a', '2b')
    test.add_edge('2a', '2c')
    test.add_edge('2a', '2d')
    test.add_edge('2b', '2c')
    test.add_edge('2b', '2d')
    test.add_edge('2c', '2d')

    # c3
    test.add_edge('3a', '3b')
    test.add_edge('3a', '3c')
    test.add_edge('3a', '3d')
    test.add_edge('3b', '3c')
    test.add_edge('3b', '3d')
    test.add_edge('3c', '3d')

    # c4
    test.add_edge('4a', '4b')
    test.add_edge('4a', '4c')
    test.add_edge('4a', '4d')
    test.add_edge('4b', '4c')
    test.add_edge('4b', '4d')
    test.add_edge('4c', '4d')

    # c5
    test.add_edge('5a', '5b')
    test.add_edge('5a', '5c')
    test.add_edge('5a', '5d')
    test.add_edge('5b', '5c')
    test.add_edge('5b', '5d')
    test.add_edge('5c', '5d')

    # connections
    test.add_edge('1a', '2c')
    test.add_edge('2a', '3c')
    test.add_edge('3a', '4c')
    test.add_edge('4a', '5c')
    test.add_edge('5a', '1c')

    # ground truth
    ground_truth = set([frozenset(['1a', '1b', '1c', '1d']),
                        frozenset(['2a', '2b', '2c', '2d']),
                        frozenset(['3a', '3b', '3c', '3d']),
                        frozenset(['4a', '4b', '4c', '4d']),
                        frozenset(['5a', '5b', '5c', '5d'])])

    communities = asyn_fluidc(test, 5, seed=9)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
コード例 #15
0
        # baseline 2: FastGreedy, Clauset-Newman-Moore greedy modularity maximization
        time_s = time.time()
        list_nodes = list(greedy_modularity_communities(G.to_undirected()))
        est_idx = np.zeros((num_nodes, ))
        for i in range(len(list_nodes)):
            for idx in list_nodes[i]:
                est_idx[idx] = i
        mutual_info[1, pn,
                    tn] = metrics.adjusted_mutual_info_score(gt, est_idx)
        runtime[1, pn, tn] = time.time() - time_s
        print('-- {}: runtime={:.4f}sec, mutual information={:.4f}.'.format(
            methods[1], runtime[1, pn, tn], mutual_info[1, pn, tn]))

        # baseline 4: Fluid Communities algorithm.
        time_s = time.time()
        comp = asyn_fluidc(G.to_undirected(), k=est_number)
        list_nodes = [frozenset(c) for c in comp]
        est_idx = np.zeros((num_nodes, ))
        for i in range(len(list_nodes)):
            for idx in list_nodes[i]:
                est_idx[idx] = i
        mutual_info[3, pn,
                    tn] = metrics.adjusted_mutual_info_score(gt, est_idx)
        runtime[3, pn, tn] = time.time() - time_s
        print('-- {}: runtime={:.4f}sec, mutual information={:.4f}.'.format(
            methods[3], runtime[3, pn, tn], mutual_info[3, pn, tn]))

        ot_dict = {
            'loss_type': 'L2',  # the key hyperparameters of GW distance
            'ot_method': 'proximal',
            'beta': 0.15,
コード例 #16
0
def partition_featurize_graph_fpdwl(G,k=100,dims=64,wl_steps=1,
                                    distribution_offset=0,distribution_exponent=0):
    """
    Partition+Anchor a graph using Fluid communities+Pagerank and produce node features using Degree+WL
    (Hence fpdwl)
    -----------
    Parameters:
    G : NetworkX graph
    k : number of blocks in partition
    dims : dimension of feature space
    wl_steps : number of Weisfeiler-Lehman aggregations to carry out
    -------
    Returns:
    p : dict with keys=node labels and values=probabilities on nodes
    partition : list of sets containing node labels
    node_subset : list of anchor node labels
    dists : distances between anchors
    features : degree+WL based node features
    """
    pr = pagerank(G)
    # Partition graph via Fluid
    partition_iter = asyn_fluidc(G,k)
    partition = []
    for i in partition_iter:
        partition.append(i)

    # Create anchors via PageRank
    anchors = []
    for p in partition:
        part_pr = {}
        for s in p:
            part_pr[s] = pr[s]
        anchors.append(max(part_pr, key=part_pr.get))
    anchors = sorted(anchors) # Fix an ordering on anchors

    # Featurize using degrees and Weisfeiler-Lehman
    degrees = dict(nx.degree(G))
    # One-hot encoding of degrees
    for key in degrees.keys():
        deg = degrees[key]
        feat = np.zeros(dims)
        if deg < dims:
            feat[deg]+=1 #Create one-hot encoding
        degrees[key] = feat #Replace scalar degree with one-hot vector
    for i in range(wl_steps):
        degrees = wl_label(G,degrees)
    # Rename, obtain sorted node names and features
    features = degrees
    a,b = list(zip(*sorted(features.items())))
    nodes = list(a)
    features = np.array(b)

    # Obtain probability vector
    p = np.array([(G.degree(n)+distribution_offset)**distribution_exponent for n in nodes])
    p = p/np.sum(p)

    # Rename anything else
    node_subset = anchors
    node_subset_idx = [nodes.index(v) for v in node_subset] #indices of anchor nodes in node list

    return nodes, features, p, partition, node_subset, node_subset_idx