Ejemplo n.º 1
0
    def novel_edge_collection(self):
        """
        """
        cluster_db = ClusterDBModule()
        novel_edge_list = cluster_db.get_noveldata_all('novel_cluster_edge_info_offline', ['gid_x', 'gid_y'])
        self.logger.info('novel edge number: {0}'.format(len(novel_edge_list)))

        disjoint_set = DisjointSet()
        for (gid_x, gid_y) in novel_edge_list:
            disjoint_set.merge(gid_x, gid_y)
Ejemplo n.º 2
0
    def novel_cluster_update(self):
        """
        """
        disjoint_set = DisjointSet()
        update_tuple_list = disjoint_set.generate_update_tuple_list()
        self.logger.info('novel cluster update number: {0}'.format(len(update_tuple_list)))

        cluster_db = ClusterDBModule()
        for (gid, rid) in update_tuple_list:
            self.logger.info('gid: {0}, rid: {1}'.format(gid, rid))
            cluster_db.update_novelclusterdirinfo_gid(gid, rid)
Ejemplo n.º 3
0
    def novel_cluster_update(self):
        """
        """
        disjoint_set = DisjointSet()
        update_tuple_list = disjoint_set.generate_update_tuple_list()
        self.logger.info('novel cluster update number: {0}'.format(len(update_tuple_list)))

        cluster_db = ClusterDBModule()
        for index, (gid, rid) in enumerate(update_tuple_list):
            self.logger.info('index: {0}/{1}, gid: {2}, rid: {3}'.format(index, len(update_tuple_list), gid, rid))
            cluster_db.update_novelclusterdirinfo_gid(gid, rid)
Ejemplo n.º 4
0
    def novel_node_collection(self):
        """
        """
        cluster_db = ClusterDBModule()
        novel_node_list = cluster_db.get_noveldata_all('novel_cluster_dir_info_offline', ['gid', 'rid', 'site_status'])
        #self.novel_node_check(novel_node_list)
        self.logger.info('novel node number: {0}'.format(len(novel_node_list)))

        disjoint_set = DisjointSet()
        for (gid, rid, site_status) in novel_node_list:
            disjoint_set.add_novel_node(gid, site_status)
Ejemplo n.º 5
0
    def novel_node_collection(self):
        """
        """
        cluster_db = ClusterDBModule()
        novel_node_list = cluster_db.get_noveldata_all('novel_cluster_dir_info_offline', ['gid', 'rid', 'site_status'])
        self.logger.info('novel node number: {0}'.format(len(novel_node_list)))
        disjoint_set = DisjointSet()
        for (gid, rid, site_status) in novel_node_list:
            disjoint_set.add_novel_node(gid, rid, site_status)

        rid_list = []
        for table_id in xrange(0, 256):
            result = cluster_db.get_novelaggregationdir_rid(table_id)
            rid_list.extend(result)
        self.logger.info('novel authority node number: {0}'.format(len(rid_list)))
        for rid in rid_list:
            disjoint_set.agg_novel_node_rank(rid)