Exemple #1
0
    def cluster_mhlink(self, num_tuples=255, tuple_size=3, table_size=2**20, thres=0.7,
                       min_cluster_size=3):
        """
        Clusters a database of mined lists using agglomerative clustering based on LSH
        """
        models=la.mhlink_cluster(self.ldb, tuple_size, num_tuples, table_size,
                                 la.list_overlap, thres, min_cluster_size)

        la.listdb_apply_to_all(models, la.list_sort_by_frequency_back)
                
        return L1LSH(ldb=models)
Exemple #2
0
    def cluster_sklearn(self, algorithm):
        """
        Clusters a database of mined lists using the clustering methods available in scikit-learn
        """
        csr = self.tocsr()
        algorithm.fit(csr.toarray())
        if hasattr(algorithm, 'cluster_centers_'):
            ldb = ndarray_to_listdb(algorithm.cluster_centers_)
        else:
            ldb = centers_from_labels(csr, algorithm.labels_)

        la.listdb_apply_to_all(ldb.ldb, la.list_sort_by_frequency_back)
        
        return ldb