def cluster_mhlink(self, num_tuples=255, tuple_size=3, table_size=2**20, thres=0.7, min_cluster_size=3): """ Clusters a database of mined lists using agglomerative clustering based on LSH """ models=la.mhlink_cluster(self.ldb, tuple_size, num_tuples, table_size, la.list_overlap, thres, min_cluster_size) la.listdb_apply_to_all(models, la.list_sort_by_frequency_back) return L1LSH(ldb=models)
def cluster_sklearn(self, algorithm): """ Clusters a database of mined lists using the clustering methods available in scikit-learn """ csr = self.tocsr() algorithm.fit(csr.toarray()) if hasattr(algorithm, 'cluster_centers_'): ldb = ndarray_to_listdb(algorithm.cluster_centers_) else: ldb = centers_from_labels(csr, algorithm.labels_) la.listdb_apply_to_all(ldb.ldb, la.list_sort_by_frequency_back) return ldb