Example #1
0
        if line.endswith("\\\n"):  # continuation line, buffer, goto next
            oldline = line.strip("\\\n")
            continue

        (headname, tails) = line.split(":")

        # head
        numfind = re.compile("^\d+")  # re to find the number of this word
        head = numfind.findall(headname)[0]  # get the number

        G.add_node(head)

        for tail in tails.split():
            if head == tail:
                print("skipping self loop", head, tail, file=sys.stderr)
            G.add_edge(head, tail)

    return G


if __name__ == '__main__':
    G = roget_graph()
    print("Loaded roget_dat.txt containing 1022 categories.")
    print("digraph has %d nodes with %d edges"
          % (nx.number_of_nodes(G), nx.number_of_edges(G)))
    UG = G.to_undirected()
    print(nx.number_connected_components(UG), "connected components")

    nx.draw_circular(UG)
    plt.show()
Example #2
0
    def _build_clusters_at_least_one_and_at_most_logn(self, graph):
        paths_for_diameter = nx.shortest_path_length(graph, weight='weight')
        for path in nx.shortest_path_length(graph, weight='weight'):
            print(path)

        ecc = nx.eccentricity(graph, sp=dict(paths_for_diameter))
        diameter = nx.diameter(graph, e=ecc)

        print('The graph diameter is ', diameter)
        height_of_cluster = math.ceil(math.log(diameter, 2)) + 1
        self._network.set_height_of_clusters(height_of_cluster)
        print('height_of_the hierarchy is ', height_of_cluster)
        # lowest level clusters
        print('lowest level clusters')
        self._network.add_cluster_level(0)

        # level 0 clusters
        for n in graph.nodes():
            paths = nx.single_source_dijkstra_path_length(graph, n, 0, weight='weight')
            print(paths)
            cluster_graph = graph.subgraph([n])
            cluster = Cluster('c' + str(n) + '_l' + '0', cluster_graph, 0, str(n))
            self._network.add_cluster(0, cluster)
            # self._network.draw_cluster(cluster.cluster_id)


        # form upper level clusters
        for i in range(int(height_of_cluster)):
            self._network.add_cluster_level(i + 1)
            clustered_peers_list = []
        #     for naming the cluster properly
            cluster_ids_list = []

            print ('AT LEVEL ----- ', i + 1)
            distance = pow(2, i + 1)
            print('THE DISTANCE LIMIT IS ', distance)
            clusterize = {}

            n = 0
        #     iterate over the peers once
            while n < self._peer_count:
                print('clustering peer ', n)

                paths_found = nx.single_source_dijkstra_path_length(graph, str(n), distance, weight='weight')

                peers_to_cluster = []
                print('paths found in the level ', paths_found)
                tmp_peers_list_to_cluster = []
                for peer in paths_found:
                    peers_to_cluster.append(peer)
                    # clustered_peers_list.append(peer)
                    tmp_peers_list_to_cluster.append(peer)

                c_id = 'c' + str(n) + '_l' + str(i + 1)

                # for naming the clusters properly
                cluster_ids_list.append(c_id)
                cluster_ids_count = Counter(cluster_ids_list)

                c_id = c_id + "_" + str(cluster_ids_count[c_id])

                temp_clustered_peers_list = copy.deepcopy(clustered_peers_list)
                temp_clustered_peers_list.extend(tmp_peers_list_to_cluster)
                duplicate = False
                if self.at_most_logn(Counter(temp_clustered_peers_list)) < 0:
                    # check if duplicate peers or not
                    for inner_key, inner_value in clusterize.items():

                        if Counter(inner_value) == Counter(tmp_peers_list_to_cluster):
                            print ("FOUND DUPLICATE CLUSTER")
                            duplicate = True
                            break
                    print("DOESN'T VIOLATE AT MOST LOG N")
                    if not duplicate:
                        clustered_peers_list.extend(tmp_peers_list_to_cluster)
                        clusterize[c_id] = peers_to_cluster

                n += 1

            print("CHECKING MEMBERSHIP")
            print(self.at_least_one(Counter(clustered_peers_list)) < 0)
            print(self.at_most_logn(Counter(clustered_peers_list)) < 0)
            assert (self.at_most_logn(Counter(clustered_peers_list)) < 0)

            # if not built yet build a cluster and remove peers who appear more than logn times
            missing_cluster_id = self.at_least_one(Counter(clustered_peers_list))
            while missing_cluster_id > -1:
                print("PEER ", missing_cluster_id, " IS MISSING, BUILDING A CLUSTER AROUND IT.")
                paths_found = nx.single_source_dijkstra_path_length(graph, str(missing_cluster_id), distance, weight='weight')
                peers_to_cluster = []
                print('paths found in the level ', paths_found)
                tmp_peers_list_to_cluster = []
                for peer in paths_found:
                    peers_to_cluster.append(peer)
                    # clustered_peers_list.append(peer)
                    tmp_peers_list_to_cluster.append(peer)
                # todo creating single node cluster
                tmp_peers_list_to_cluster = [str(missing_cluster_id)]
                c_id = 'c' + str(missing_cluster_id) + '_l' + str(i + 1)

                # for naming the clusters properly
                cluster_ids_list.append(c_id)
                cluster_ids_count = Counter(cluster_ids_list)

                c_id = c_id + "_" + str(cluster_ids_count[c_id])

                temp_clustered_peers_list = copy.deepcopy(clustered_peers_list)
                temp_clustered_peers_list.extend(tmp_peers_list_to_cluster)
                duplicate = False
                if self.at_most_logn(Counter(temp_clustered_peers_list)) < 0:
                    # check if duplicate peers or not
                    for inner_key, inner_value in clusterize.items():

                        if Counter(inner_value) == Counter(tmp_peers_list_to_cluster):
                            print("FOUND DUPLICATE CLUSTER")
                            duplicate = True
                            break
                    print("DOESN'T VIOLATE AT MOST LOG N")
                    if not duplicate:
                        print("WHY HERE")
                        clustered_peers_list.extend(tmp_peers_list_to_cluster)
                        clusterize[c_id] = tmp_peers_list_to_cluster
                else:
                #     find the peer that appears more than logn
                    excess_cluster_id = self.at_most_logn(Counter(temp_clustered_peers_list))

                    while excess_cluster_id != -1:
                        print(tmp_peers_list_to_cluster)
                        print(excess_cluster_id)
                        tmp_peers_list_to_cluster.remove(str(excess_cluster_id))
                        tmp_clustered_peers_list = copy.deepcopy(clustered_peers_list)
                        tmp_clustered_peers_list.extend(tmp_peers_list_to_cluster)
                        # removing
                        print(excess_cluster_id, " APPEARS IN MORE THAN LOGN CLUSTER, REMOVING IT")
                        excess_cluster_id = self.at_most_logn(Counter(tmp_clustered_peers_list))
                    print("PREPARING THE CLUSTER FROM MODIFIED PEERS LIST ", tmp_peers_list_to_cluster)
                    clustered_peers_list.extend(tmp_peers_list_to_cluster)
                    clusterize[c_id] = tmp_peers_list_to_cluster
                    assert (self.at_most_logn(Counter(clustered_peers_list)) < 0)

                missing_cluster_id = self.at_least_one(Counter(clustered_peers_list))

            print("ASSERTING")
            assert (self.at_least_one(Counter(clustered_peers_list)) < 0)
            assert (self.at_most_logn(Counter(clustered_peers_list)) < 0)

            print(clusterize)
            print(Counter(clustered_peers_list))

            print("FINALLY ADDING CLUSTERS")
            for key in clusterize:
                print (len(clusterize[key]))
                print(nx.number_of_nodes(graph.subgraph([str(i) for i in clusterize[key]])))
                assert(nx.number_of_nodes(graph.subgraph([str(i) for i in clusterize[key]])) == len(clusterize[key]))
                cluster_graph = graph.subgraph([str(i) for i in clusterize[key]])
                cluster = Cluster(key, cluster_graph, i + 1)
                if i + 1 == height_of_cluster:
                    cluster.root = True
                    self._network._root_cluster = cluster
                self._network.add_cluster(i + 1, cluster)
                # self._network.draw_cluster(cluster.cluster_id)

            print("CLUSTERIZE ", clusterize)

        return
Example #3
0
        numfind = re.compile("^\d+")  # re to find the number of this word
        head = numfind.findall(headname)[0]  # get the number

        G.add_node(head)

        for tail in tails.split():
            if head == tail:
                print("skipping self loop", head, tail, file=sys.stderr)
            G.add_edge(head, tail)

    return G


if __name__ == '__main__':
    G = roget_graph()
    print("Loaded roget_dat.txt containing 1022 categories.")
    print("digraph has %d nodes with %d edges"
          % (nx.number_of_nodes(G), nx.number_of_edges(G)))
    UG = G.to_undirected()
    print(nx.number_connected_components(UG), "connected components")

    options = {
        'node_color': 'black',
        'node_size': 1,
        'line_color': 'grey',
        'linewidths': 0,
        'width': 0.1,
    }
    nx.draw_circular(UG, **options)
    plt.show()
Example #4
0
    def _build_clusters_exactly_logn_membership(self, graph):
        paths_for_diameter = nx.shortest_path_length(graph, weight='weight')
        for path in nx.shortest_path_length(graph, weight='weight'):
            print(path)

        ecc = nx.eccentricity(graph, sp=dict(paths_for_diameter))
        # ecc = nx.eccentricity(graph, sp=shortest_paths_for_diameter)
        diameter = nx.diameter(graph, e=ecc)
        # for path in paths_for_diameter:
        #     print(path)

        print('The graph diameter is ', diameter)
        height_of_cluster = math.ceil(math.log(diameter, 2)) + 1
        print('height_of_the hierarchy is ', height_of_cluster)
        # lowest level clusters
        print('lowest level clusters')
        self._network.add_cluster_level(0)

        for n in graph.nodes():
            paths = nx.single_source_dijkstra_path_length(graph, n, 0, weight='weight')
            print(paths)
            cluster_graph = graph.subgraph([n])
            cluster = Cluster('c' + str(n) + '_l' + '0', cluster_graph, 0)
            self._network.add_cluster(0, cluster)
            # self._network.draw_cluster(cluster.cluster_id)

        for i in range(int(height_of_cluster)):
            self._network.add_cluster_level(i + 1)
            clustered_peers_list = []
            cluster_ids_list = []

            print('AT LEVEL ------- ', i + 1)
            distance = pow(2, i + 1)

            print('THE DISTANCE LIMIT IS ', distance)
            clustered_peers = []
            # for naming the clusters properly
            n = 0
            while n < self._peer_count:
                incomplete = False
                # for n in graph.nodes():
                print('clustering peer ', n)

                paths_found = nx.single_source_dijkstra_path_length(graph, str(n), distance, weight='weight')
                peers_to_cluster = []
                print('paths found in the level ', paths_found)
                for peer in paths_found:
                    clustered_peers.append(int(peer))
                    peers_to_cluster.append(int(peer))
                    clustered_peers_list.append(peer)

                cluster_graph = graph.subgraph([str(i) for i in peers_to_cluster])

                c_id = 'c' + str(n) + '_l' + str(i + 1)
                cluster_ids_list.append(c_id)
                cluster_ids_count = Counter(cluster_ids_list)
                c_id = c_id + "_" + str(cluster_ids_count[c_id])
                cluster = Cluster(c_id, cluster_graph, i + 1)
                self._network.add_cluster(i + 1, cluster)
                # self._network.draw_cluster(cluster.cluster_id)
                clustered_peers_count = Counter(clustered_peers_list)
                # print(self._network._clusters)
                for j in range(self._peer_count):
                    if clustered_peers_count[str(j)] < math.log(self._peer_count, 2):
                        incomplete = True

                if not incomplete:
                    # make peers fall in exactly log(n) clusters
                    for peer in range(self._peer_count):
                        # count of peers in all clusters
                        peer_count = clustered_peers_count[str(peer)]
                        while peer_count > math.log(self._peer_count, 2):
                            for cluster in reversed(self._network.clusters_by_level(i + 1)):
                                if clustered_peers_count[str(peer)] > math.log(self._peer_count, 2) and cluster.graph.has_node(
                                        str(peer)):
                                    cluster.graph.remove_node(str(peer))

                                    peer_count = peer_count - 1
                                    if str(peer) in clustered_peers_list:
                                        clustered_peers_list.remove(str(peer))
                                    clustered_peers_count = Counter(clustered_peers_list)

                                    break

                    break

                n += 1
                if n == self._peer_count:
                    n = 0

        # delete empty clusters
        for i in range(int(height_of_cluster)):
            for cluster in self._network.clusters_by_level(i):
                if nx.number_of_nodes(cluster.graph) == 0:
                    self._network.remove_cluster_by_id(cluster.cluster_id, cluster.level)

        for i in range(int(height_of_cluster)):
            for cluster in self._network.clusters_by_level(i):
                assert (nx.number_of_nodes(cluster.graph) is not 0)

        # make sure that there are log(n) peers in every level
        for i in range(int(height_of_cluster) - 1):
            print("Verifying in level ", i + 1)
            clustered_peer_list = []
            for cluster in self._network.clusters_by_level(i + 1):
                for node in cluster.graph.nodes:
                    clustered_peer_list.append(str(node))
            clustered_peer_count = Counter(clustered_peer_list)
            for cl in clustered_peer_count:
                assert (clustered_peer_count[cl] == math.log(self._peer_count, 2))