def __init__(self, n_components=10, n_clusters=3, selected_embedding='autoencoder', level_dims_in=LEVEL_DIMS_IN, level_dims_out=LEVEL_DIMS_OUT, dropout=DROPOUT, epochs=EPOCHS, _clustering=CLUSTERING, _embedding=EMBEDDING, **kwargs): """ """ self.level_dims_in = level_dims_in self.level_dims_out = level_dims_out self.dropout = dropout self.epochs = epochs _embedding['autoencoder'] = DeepBase( dropout=self.dropout, epochs=self.epochs, level_dims_in=self.level_dims_in, level_dims_out=self.level_dims_out, n_components=n_components) _clustering['autoencoder'] = DeepBase( dropout=self.dropout, epochs=self.epochs, level_dims_in=self.level_dims_in, level_dims_out=self.level_dims_out, n_components=n_clusters) GranatumClustering.__init__(self, selected_embedding=selected_embedding, n_components=n_components, n_clusters=n_clusters, _embedding=_embedding, _clustering=_clustering, **kwargs) self._plotting_embedding['autoencoder'] = DeepBase( dropout=self.dropout, epochs=self.epochs, level_dims_in=self.level_dims_in, level_dims_out=self.level_dims_out, n_components=2) self._find_best_cluster['autoencoder'] = { 'method': _silhouette_analysis, 'range': FIND_BEST_CLUSTER_RANGE }
def main(): inputs = json.load(stdin) granatum_clustering = GranatumClustering(**inputs) matrix = inputs['assay']['matrix'] matrix = np.array(matrix).T sample_ids = inputs['assay']['sampleIds'] results = granatum_clustering.fit(matrix=matrix, sample_ids=sample_ids, jsonify=True) stdout.write(json.dumps(results))
def test_5_default(): """ test all plotting embbeding algorithm """ granatum = GranatumClustering( n_components=5) granatum.fit(TEST_DATASET) for embbed in PLOTTING_EMBEDDING: print('#### plotting embbeding used tested: {0}'.format(embbed)) plots = granatum.plot(embedding=embbed) assert('embedding_for_plotting' in plots) assert('plot_figure_html' in plots) assert('plot_figure_png' in plots)
def test_4_default(): """ test all clustering algorithm with optimum cluster selection """ for clustering in CLUSTERING: print('#### clustering algo tested: {0}'.format(clustering)) granatum = GranatumClustering( n_components=5, selected_clustering=clustering) results = granatum.fit(TEST_DATASET) print('number of clusters found: {0}'.format(results['n_clusters'])) assert('clusters' in results) assert('n_clusters' in results) assert('n_components' in results) assert('embedding' in results) assert('clustering_algorithm' in results)
def test_3_default(): """ test all clustering_algorithm """ for clustering in CLUSTERING: print('#### clustering algo tested: {0}'.format(clustering)) granatum = GranatumClustering( n_components=5, selected_plotting_embedding='PCA', selected_clustering=clustering, find_best_number_of_cluster=False) results = granatum.fit(TEST_DATASET) assert('clusters' in results) assert('n_clusters' in results) assert('n_components' in results) assert('embedding' in results) assert('clustering_algorithm' in results)
def test_2_default(): """ test all algorithm for embedding """ for embedd in EMBEDDING: print('#### embedding tested: {0}'.format(embedd)) granatum = GranatumClustering( n_components=5, find_best_number_of_cluster=False, selected_embedding=embedd) results = granatum.fit(TEST_DATASET) assert('clusters' in results) assert('n_clusters' in results) assert('n_components' in results) assert('embedding' in results) assert('clustering_algorithm' in results)
def test_0_default(): """ test default parameter on the default test dataset and the plotting """ granatum = GranatumClustering(n_components=5, selected_clustering='DBSCAN') results = granatum.fit(TEST_DATASET) assert('clusters' in results) assert('n_clusters' in results) assert('n_components' in results) assert('embedding' in results) assert('clustering_algorithm' in results) plots = granatum.plot(plot_figure_png=True, plot_figure_html=True, embedding='PCA', jsonify=False) assert('embedding_for_plotting' in plots) assert('plot_figure_html' in plots) assert('plot_figure_png' in plots)
def main(): """ """ from sklearn.datasets import make_blobs test_dataset, ref_array = make_blobs(n_samples=300, n_features=200, centers=8) metadata = { i: { 'dummy': 'dummy:{0}'.format(i) } for i in range(len(test_dataset)) } sample_ids = list(range(len(test_dataset))) granatum = GranatumClustering(selected_embedding='PCA', selected_clustering='WARD') results = granatum.fit(matrix=test_dataset, metadata=metadata, sample_ids=sample_ids) assert ('clusters' in results) assert ('n_clusters' in results) assert ('n_components' in results) assert ('embedding' in results) assert ('clustering_algorithm' in results) plots = granatum.plot(figsize=(600, 600), embedding='PCA', plot_figure_png=True, plot_figure_html=True, jsonify=False) assert ('plot_figure_html' in plots) assert ('plot_figure_png' in plots)