コード例 #1
0
    def __init__(self,
                 n_components=10,
                 n_clusters=3,
                 selected_embedding='autoencoder',
                 level_dims_in=LEVEL_DIMS_IN,
                 level_dims_out=LEVEL_DIMS_OUT,
                 dropout=DROPOUT,
                 epochs=EPOCHS,
                 _clustering=CLUSTERING,
                 _embedding=EMBEDDING,
                 **kwargs):
        """
        """
        self.level_dims_in = level_dims_in
        self.level_dims_out = level_dims_out
        self.dropout = dropout
        self.epochs = epochs

        _embedding['autoencoder'] = DeepBase(
            dropout=self.dropout,
            epochs=self.epochs,
            level_dims_in=self.level_dims_in,
            level_dims_out=self.level_dims_out,
            n_components=n_components)

        _clustering['autoencoder'] = DeepBase(
            dropout=self.dropout,
            epochs=self.epochs,
            level_dims_in=self.level_dims_in,
            level_dims_out=self.level_dims_out,
            n_components=n_clusters)

        GranatumClustering.__init__(self,
                                    selected_embedding=selected_embedding,
                                    n_components=n_components,
                                    n_clusters=n_clusters,
                                    _embedding=_embedding,
                                    _clustering=_clustering,
                                    **kwargs)

        self._plotting_embedding['autoencoder'] = DeepBase(
            dropout=self.dropout,
            epochs=self.epochs,
            level_dims_in=self.level_dims_in,
            level_dims_out=self.level_dims_out,
            n_components=2)

        self._find_best_cluster['autoencoder'] = {
            'method': _silhouette_analysis,
            'range': FIND_BEST_CLUSTER_RANGE
        }
コード例 #2
0
ファイル: main.py プロジェクト: granatumx/gbox-clustering
def main():
    inputs = json.load(stdin)

    granatum_clustering = GranatumClustering(**inputs)

    matrix = inputs['assay']['matrix']
    matrix = np.array(matrix).T

    sample_ids = inputs['assay']['sampleIds']

    results = granatum_clustering.fit(matrix=matrix,
                                      sample_ids=sample_ids,
                                      jsonify=True)

    stdout.write(json.dumps(results))
コード例 #3
0
def test_5_default():
    """
    test all plotting embbeding algorithm
    """
    granatum = GranatumClustering(
        n_components=5)
    granatum.fit(TEST_DATASET)

    for embbed in PLOTTING_EMBEDDING:
        print('#### plotting embbeding used tested: {0}'.format(embbed))

        plots = granatum.plot(embedding=embbed)

        assert('embedding_for_plotting' in plots)
        assert('plot_figure_html' in plots)
        assert('plot_figure_png' in plots)
コード例 #4
0
def test_4_default():
    """
    test all clustering algorithm with optimum cluster selection
    """
    for clustering in CLUSTERING:
        print('#### clustering algo tested: {0}'.format(clustering))

        granatum = GranatumClustering(
            n_components=5,
            selected_clustering=clustering)

        results = granatum.fit(TEST_DATASET)
        print('number of clusters found: {0}'.format(results['n_clusters']))

        assert('clusters' in results)
        assert('n_clusters' in results)
        assert('n_components' in results)
        assert('embedding' in results)
        assert('clustering_algorithm' in results)
コード例 #5
0
def test_3_default():
    """
    test all clustering_algorithm
    """
    for clustering in CLUSTERING:
        print('#### clustering algo tested: {0}'.format(clustering))

        granatum = GranatumClustering(
            n_components=5,
            selected_plotting_embedding='PCA',
            selected_clustering=clustering,
            find_best_number_of_cluster=False)
        results = granatum.fit(TEST_DATASET)

        assert('clusters' in results)
        assert('n_clusters' in results)
        assert('n_components' in results)
        assert('embedding' in results)
        assert('clustering_algorithm' in results)
コード例 #6
0
def test_2_default():
    """
    test all algorithm for embedding
    """
    for embedd in EMBEDDING:
        print('#### embedding tested: {0}'.format(embedd))

        granatum = GranatumClustering(
            n_components=5,
            find_best_number_of_cluster=False,
            selected_embedding=embedd)

        results = granatum.fit(TEST_DATASET)

        assert('clusters' in results)
        assert('n_clusters' in results)
        assert('n_components' in results)
        assert('embedding' in results)
        assert('clustering_algorithm' in results)
コード例 #7
0
def test_0_default():
    """
    test default parameter on the default test dataset and the plotting
    """
    granatum = GranatumClustering(n_components=5, selected_clustering='DBSCAN')
    results = granatum.fit(TEST_DATASET)

    assert('clusters' in results)
    assert('n_clusters' in results)
    assert('n_components' in results)
    assert('embedding' in results)
    assert('clustering_algorithm' in results)

    plots = granatum.plot(plot_figure_png=True,
                          plot_figure_html=True,
                          embedding='PCA',
                          jsonify=False)

    assert('embedding_for_plotting' in plots)
    assert('plot_figure_html' in plots)
    assert('plot_figure_png' in plots)
コード例 #8
0
def main():
    """ """
    from sklearn.datasets import make_blobs

    test_dataset, ref_array = make_blobs(n_samples=300,
                                         n_features=200,
                                         centers=8)
    metadata = {
        i: {
            'dummy': 'dummy:{0}'.format(i)
        }
        for i in range(len(test_dataset))
    }
    sample_ids = list(range(len(test_dataset)))

    granatum = GranatumClustering(selected_embedding='PCA',
                                  selected_clustering='WARD')

    results = granatum.fit(matrix=test_dataset,
                           metadata=metadata,
                           sample_ids=sample_ids)

    assert ('clusters' in results)
    assert ('n_clusters' in results)
    assert ('n_components' in results)
    assert ('embedding' in results)
    assert ('clustering_algorithm' in results)

    plots = granatum.plot(figsize=(600, 600),
                          embedding='PCA',
                          plot_figure_png=True,
                          plot_figure_html=True,
                          jsonify=False)

    assert ('plot_figure_html' in plots)
    assert ('plot_figure_png' in plots)