Esempi in Python per DPGMM.bic, esempi in Python per sklearn.mixture.DPGMM.bic

Esempio n. 1

0

Mostra file

File: cluster.py Progetto: pmeier82/BOTMpy

    def _fit_dpgmm(self, x):
        # clustering
        k = max(self.crange)
        for r in xrange(self.repeats):
            # info
            if self.debug is True:
                print '\t[%s][c:%d][r:%d]' % (self.clus_type, k, r + 1),

            # fit and evaluate model
            model_kwargs = {}
            if 'alpha' in self.clus_kwargs:
                model_kwargs.update(alpha=self.clus_kwargs['alpha'])
            if 'conv_thresh' in self.clus_kwargs:
                model_kwargs.update(thresh=self.clus_kwargs['conv_thresh'])
            if 'max_iter' in self.clus_kwargs:
                model_kwargs.update(n_iter=self.clus_kwargs['max_iter'])

            model = DPGMM(n_components=k, covariance_type=self.cvtype,
                          **model_kwargs)
            model.fit(x)
            self._labels[r] = model.predict(x)
            self._parameters[r] = model.means_
            self._ll[r] = model.score(x).sum()

            # evaluate goodness of fit for this run
            #self._gof[r] = self.gof(x, self._ll[r], k)
            if self.gof_type == 'aic':
                self._gof[r] = model.aic(x)
            if self.gof_type == 'bic':
                self._gof[r] = model.bic(x)

            # debug
            if self.debug is True:
                print self._gof[r], model.n_components, model.weights_.shape[0]

Esempio n. 2

0

Mostra file

File: cluster.py Progetto: felfranke/BOTMpy

    def _fit_dpgmm(self, x):
        # clustering
        k = max(self.crange)
        for r in xrange(self.repeats):
            # info
            if self.debug is True:
                print '\t[%s][c:%d][r:%d]' % (self.clus_type, k, r + 1),

            # fit and evaluate model
            model_kwargs = {}
            if 'alpha' in self.clus_kwargs:
                model_kwargs.update(alpha=self.clus_kwargs['alpha'])
            if 'conv_thresh' in self.clus_kwargs:
                model_kwargs.update(thresh=self.clus_kwargs['conv_thresh'])
            if 'max_iter' in self.clus_kwargs:
                model_kwargs.update(n_iter=self.clus_kwargs['max_iter'])

            model = DPGMM(n_components=k,
                          covariance_type=self.cvtype,
                          **model_kwargs)
            model.fit(x)
            self._labels[r] = model.predict(x)
            self._parameters[r] = model.means_
            self._ll[r] = model.score(x).sum()

            # evaluate goodness of fit for this run
            #self._gof[r] = self.gof(x, self._ll[r], k)
            if self.gof_type == 'aic':
                self._gof[r] = model.aic(x)
            if self.gof_type == 'bic':
                self._gof[r] = model.bic(x)

            # debug
            if self.debug is True:
                print self._gof[r], model.n_components, model.weights_.shape[0]

Esempio n. 3

0

Mostra file

File: old_cluster_util.py Progetto: wingr/pywing

def get_best_dpgmm(X, num_c, cv_type, alpha, iters, n_init, rand_state=None):
    best_bic = np.inf
    bic_dpgmm = None
    lbl_vec_dpgmm = np.zeros(X.shape[0])
    prob_vec_dpgmm = np.zeros(X.shape[0])
    log_prob_dpgmm = None
    for i in xrange(n_init):
        dpgmm = DPGMM(n_components=num_c, covariance_type=cv_type, \
                        alpha=alpha, random_state=rand_state)
        dpgmm.fit(X)
        b = dpgmm.bic(X)
        if b < best_bic:
            bic_dpgmm = b
            lbl_vec = dpgmm.predict(X)
            prob_vec = dpgmm.predict_proba(X)
            log_prob_dpgmm = np.sum(dpgmm.score(X))
    return [lbl_vec, prob_vec, bic_dpgmm, log_prob_dpgmm]

Esempio n. 4

0

Mostra file

def plot_num_iters_dpgmm(X, num_c, cv_type, alpha, max_iters, n_init):
    bic = []
    for iters in np.arange(1, max_iters):
        best_bic = np.inf
        for j in xrange(n_init):
            dpgmm = DPGMM(n_components=comp, covariance_type=cv_type, \
                                                alpha=a, n_iter=iters)
            dpgmm.fit(X)
            b = dpgmm.bic(X)
            if b < best_bic:
                best_bic = b
        bic.append(best_bic)
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(np.arange(1, max_iters), bic)
    ax.set_title('BIC vs. Number of Iterations DPGMM')
    ax.set_xlabel('Number of iterations')
    ax.set_ylabel('BIC score')
    return fig

Esempio n. 5

0

Mostra file

def plot_alpha_dpgmm(X, num_c, cv_type, alphas, iters, n_init):
    bic = []
    for a in alphas:
        best_bic = np.inf
        for j in xrange(n_init):
            dpgmm = DPGMM(n_components=num_c, covariance_type=cv_type, \
                                                alpha=a, n_iter=iters)
            dpgmm.fit(X)
            b = dpgmm.bic(X)
            if b < best_bic:
                best_bic = b
        bic.append(best_bic)

    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(alphas, bic, 'bo-', lw=2)
    ax.set_title('BIC vs. Alpha DPGMM')
    ax.set_xlabel('Alpha')
    ax.set_ylabel('BIC score')
    return fig

Esempio n. 6

0

Mostra file

File: plot_clustering.py Progetto: rafaelvalle/music_segmentation

def plotClustering(fullpath, order=1, sr=4, cutoff=.1, n_singv=3,
                   feature='chroma', dim_red='SVD', round_to=0, normalize=1,
                   scale=1, length=4, clustering='KMEANS'):
    feat = {}
    print ('Analyzing {} with feature {}, order {}, sr {}, cutoff {}, '
           'n_singv {}, scale {} normalize {}, round_to {}'.format(
               fullpath, feature, order, sr, cutoff, n_singv, scale, normalize,
               round_to))
    # extract filename, filepath and beat aligned feature
    filename, file_ext = os.path.splitext(fullpath)

    # extract filter and apply pre-processing
    feat[feature], beat_times = extractFeature(
        filename, file_ext, feature, scale, round_to, normalize,
        beat_sync=True, save=True)

    feat['LPF'] = lpf(feat[feature], cutoff, sr, order)
    feat[dim_red] = dim_red_fn(dim_red, feat[feature], n_singv)
    feat['{}(LPF)'.format(dim_red)] = dim_red_fn(
        dim_red, feat['LPF'], n_singv)
    feat['LPF({})'.format(dim_red)] = lpf(feat[dim_red], cutoff, sr, order)
    feat['{}-LPF'.format(feature)] = feat[feature] - feat['LPF']
    feat['LPF({}-LPF)'.format(feature)] = lpf(
        feat['{}-LPF'.format(feature)], cutoff, sr, order)
    feat['{}(LPF({}-LPF))'.format(dim_red, feature)] = dim_red_fn(dim_red,
        feat['LPF({}-LPF)'.format(feature)], n_singv)

    # create vars for plotting
    ts = np.arange(0, len(feat[feature]))
    step_size = max(1, int(len(ts) * .01))
    fig = plt.figure(figsize=(98, 64))
    fig.suptitle('feature {} order {}, cutoff {}, sr {}'.format(
        feature, order, cutoff, sr))

    gs = mpl.gridspec.GridSpec(12, 4, width_ratios=[1, 1, 1, 1])
    i = 0
    print "\tPlot data and pre-processing"
    for name in (feature,
                 '{}-LPF'.format(feature),
                 '{}(LPF)'.format(dim_red),
                 'LPF({})'.format(dim_red),
                 'LPF({}-LPF)'.format(feature),
                 '{}(LPF({}-LPF))'.format(dim_red, feature)):
        data = feat[name]

        data_wide = np.array([feat[name][m:m+length, :]
                              for m in xrange(len(feat[name])-length)])
        data_wide = data_wide.reshape(
            data_wide.shape[0], data_wide.shape[1]*data_wide.shape[2])

        # build codebook using kmeans or DP-GMM
        if clustering == 'KMEANS':
            K_MIN, K_MAX = 2, 16
            KM = [KMeans(n_clusters=l, init='k-means++').fit(data_wide)
                  for l in xrange(K_MIN, K_MAX+1)]

            # compute scores to assess fit
            scores_bic = [computeBic(KM[x], data_wide) for x in xrange(len(KM))]
            scores_inertia = [KM[x].inertia_ for x in xrange(len(KM))]
            scores_silhouette = [silhouette_score(data_wide, KM[x].labels_,
                                                  metric='euclidean')
                                 for x in xrange(len(KM))]

            # get best clusters
            idx_best_bic = findElbow(np.dstack(
                (xrange(K_MIN, K_MAX+1), scores_bic))[0])
            idx_best_inertia = findElbow(np.dstack(
                (xrange(K_MIN, K_MAX+1), scores_inertia))[0])
            idx_best_silhouette = findElbow(np.dstack(
                (xrange(K_MIN, K_MAX+1), scores_silhouette))[0])
            idx_best = int(np.median(
                (idx_best_bic, idx_best_inertia, idx_best_silhouette))) + 1

            # get clusters and cluster allocations given best K
            k_best = idx_best + K_MIN
            centroids = KM[idx_best].cluster_centers_
            centroid_idx = KM[idx_best].labels_
        elif clustering == 'DPGMM':
            n_components = 12
            dpgmm = DPGMM(
                n_components=n_components, tol=1e-3, n_iter=32, alpha=1000,
                covariance_type='diag', verbose=True)
            dpgmm.fit(data_wide)

            # compute scores to assess fit
            scores_bic = dpgmm.bic(data_wide)
            scores_silhouette = [silhouette_score(data_wide, centroids,
                                 metric='euclidean')]
            scores_silhouette = [0.0]

            # get clusters and cluster allocations given best K
            k_best = dpgmm.means_.shape[0]
            centroids = dpgmm.means_
            centroid_idx = np.argmax(dpgmm.predict_proba(data_wide), axis=1)
        # plot data
        if data.shape[1] == 3:
            data = data.reshape(1, data.shape[0], data.shape[1])
        else:
            data = data.T

        ax = fig.add_subplot(gs[i, :])
        ax.set_title(name)
        ax.imshow(data,
                  interpolation='nearest',
                  origin='low',
                  aspect='auto',
                  cmap=plt.cm.Oranges)
        xlabels = ["{}:{}".format(int(x / 60), int(x % 60))
                   for x in beat_times[::step_size]]
        ax.set_xticks(ts[::step_size])
        ax.set_xticklabels(xlabels, rotation=60)
        ax.grid(False)

        # plot clustering on raw feature
        changes = np.hstack(([True], centroid_idx[:-1] != centroid_idx[1:]))
        for c in xrange(changes.shape[0]-1):
            if changes[c] and changes[c+1]:
                changes[c] = False
        ax_twin = ax.twiny()
        ax_twin.set_xlim(ax.get_xlim())
        ax_twin.set_xticks(np.argwhere(changes)[:, 0])
        ax_twin.set_xticklabels(centroid_idx[changes])
        ax_twin.grid(False)

        # plot codebook (centroids)
        ax = fig.add_subplot(gs[i+1, 0])
        ax.set_title(name)

        if centroids.shape[1] == 3:
            centroids = centroids.reshape(
                1, centroids.shape[0], centroids.shape[1])
        elif centroids.shape[1] == n_singv * length:
            centroids = centroids.reshape(
                1, centroids.shape[0]*length, centroids.shape[1]/length)
        else:
            centroids = centroids.reshape(
                centroids.shape[0] * length,
                centroids.shape[1] / length).T
        ax.imshow(centroids,
                  interpolation='nearest',
                  origin='low',
                  aspect='auto',
                  cmap=plt.cm.Oranges)
        ax.set_xticks(xrange(0, centroids.shape[1], 4))
        ax.set_xticklabels(xrange(k_best))
        ax.grid(False)

        # plot elbow curve
        c = 1
        for k, v, idx in (('BIC', scores_bic, idx_best_bic),
                          ('INERTIA', scores_inertia, idx_best_inertia),
                          ('SILHOUETTE', scores_silhouette, idx_best_silhouette)
                          ):
            ax = fig.add_subplot(gs[i+1, c])
            ax.set_title('{}, {} best K {}'.format(name, k, idx+K_MIN))
            ax.plot(xrange(K_MIN, K_MAX+1), v,  'b*-')
            ax.set_xlim((K_MIN, K_MAX+1))
            ax.set_xlabel('Number of clusters')
            ax.set_ylabel('Score')
            ax.grid(True)
            ax.axvline(idx+K_MIN, color='r')
            c += 1
        i += 2

        """
        if 'SVD' in name:
            # scikit-image clustering
            segments_slic = slic(
                data, n_segments=10, compactness=10, sigma=1)
            segments_quickshift = quickshift(
                data, kernel_size=3, max_dist=6, ratio=0.5)

            ax = fig.add_subplot(gs[k, 0])
            ax.set_title('{} with quickshift'.format(name))
            ax.imshow(mark_boundaries(data, segments_quickshift, mode='outer'),
                      interpolation='nearest',
                      origin='low',
                      aspect='auto',
                      cmap=plt.cm.Oranges)
            ax.set_xticks(ts[::step_size])
            ax.set_xticklabels(beat_times[::step_size], rotation=60)
            ax.grid(False)

            ax = fig.add_subplot(gs[k, 1])

            ax.set_title('{} with slic'.format(name))
            ax.imshow(mark_boundaries(data, segments_slic, mode='outer'),
                      interpolation='nearest',
                      origin='low',
                      aspect='auto',
                      cmap=plt.cm.Oranges)
            ax.set_xticks(ts[::step_size])
            ax.set_xticklabels(beat_times[::step_size], rotation=60)
            ax.grid(False)
            k += 1
        """

    plt.tight_layout()
    plt.savefig("{}_clustering_{}_{}_r_{}_n_{}_s_{}_l_{}_{}.png".format(
        filename, feature, cutoff, round_to, normalize, scale, length, dim_red))

    # save with large size
    plt.savefig("{}_clustering_{}_{}_r_{}_n_{}_s_{}_l_{}_{}.png".format(
        filename, feature, cutoff, round_to, normalize, scale, length, dim_red))
    # save with smaller size
    fig.set_figwidth(36)
    fig.set_figheight(24)
    plt.tight_layout()
    plt.savefig("{}_clustering_{}_{}_r_{}_n_{}_s_{}_l_{}_{}_small.png".format(
        filename, feature, cutoff, round_to, normalize, scale, length, dim_red))

    plt.close(fig)

Esempio n. 7

0

Mostra file

File: dpgmm_test.py Progetto: lzamparo/SdA_reduce

for chunks in np.arange(1, opts.size, step = 3):
  # Sample the specified number of points from X_unlabeled
  size = np.cumsum(chunk_sizes[:chunks])[-1]
  
  # Fit a Dirichlet process mixture of Gaussians using up to  ten components
  dpgmm = DPGMM(n_components=10, alpha=10.0, covariance_type='full')
  indices = np.arange(X_unlabeled.shape[0])
  np.random.shuffle(indices)
  X = X_unlabeled[indices[:size],]
  
  print("fitting a model with", size, "data points")
  with timeit():
    dpgmm.fit(X)
  print("Done!")
  print("AIC for this model & data: ", dpgmm.aic(X))
  print("BIC for this model & data: ", dpgmm.bic(X))
  Y_hat = dpgmm.predict(X)
  print ("Model assigned points to", np.max(Y_hat), "components")
  

# How can I best check this out? 
#color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])
#for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                  #(dpgmm, 'Dirichlet Process GMM')]):
    #splot = plt.subplot(2, 1, 1 + i)
    #Y_ = clf.predict(X)
    #for i, (mean, covar, color) in enumerate(zip(
            #clf.means_, clf._get_covars(), color_iter)):
        #v, w = linalg.eigh(covar)
        #u = w[0] / linalg.norm(w[0])
        ## as the DP will not use every component it has access to

Esempio n. 8

0

Mostra file

def plotClustering(fullpath,
                   order=1,
                   sr=4,
                   cutoff=.1,
                   n_singv=3,
                   feature='chroma',
                   dim_red='SVD',
                   round_to=0,
                   normalize=1,
                   scale=1,
                   length=4,
                   clustering='KMEANS'):
    feat = {}
    print(
        'Analyzing {} with feature {}, order {}, sr {}, cutoff {}, '
        'n_singv {}, scale {} normalize {}, round_to {}'.format(
            fullpath, feature, order, sr, cutoff, n_singv, scale, normalize,
            round_to))
    # extract filename, filepath and beat aligned feature
    filename, file_ext = os.path.splitext(fullpath)

    # extract filter and apply pre-processing
    feat[feature], beat_times = extractFeature(filename,
                                               file_ext,
                                               feature,
                                               scale,
                                               round_to,
                                               normalize,
                                               beat_sync=True,
                                               save=True)

    feat['LPF'] = lpf(feat[feature], cutoff, sr, order)
    feat[dim_red] = dim_red_fn(dim_red, feat[feature], n_singv)
    feat['{}(LPF)'.format(dim_red)] = dim_red_fn(dim_red, feat['LPF'], n_singv)
    feat['LPF({})'.format(dim_red)] = lpf(feat[dim_red], cutoff, sr, order)
    feat['{}-LPF'.format(feature)] = feat[feature] - feat['LPF']
    feat['LPF({}-LPF)'.format(feature)] = lpf(feat['{}-LPF'.format(feature)],
                                              cutoff, sr, order)
    feat['{}(LPF({}-LPF))'.format(dim_red, feature)] = dim_red_fn(
        dim_red, feat['LPF({}-LPF)'.format(feature)], n_singv)

    # create vars for plotting
    ts = np.arange(0, len(feat[feature]))
    step_size = max(1, int(len(ts) * .01))
    fig = plt.figure(figsize=(98, 64))
    fig.suptitle('feature {} order {}, cutoff {}, sr {}'.format(
        feature, order, cutoff, sr))

    gs = mpl.gridspec.GridSpec(12, 4, width_ratios=[1, 1, 1, 1])
    i = 0
    print "\tPlot data and pre-processing"
    for name in (feature, '{}-LPF'.format(feature), '{}(LPF)'.format(dim_red),
                 'LPF({})'.format(dim_red), 'LPF({}-LPF)'.format(feature),
                 '{}(LPF({}-LPF))'.format(dim_red, feature)):
        data = feat[name]

        data_wide = np.array([
            feat[name][m:m + length, :]
            for m in xrange(len(feat[name]) - length)
        ])
        data_wide = data_wide.reshape(data_wide.shape[0],
                                      data_wide.shape[1] * data_wide.shape[2])

        # build codebook using kmeans or DP-GMM
        if clustering == 'KMEANS':
            K_MIN, K_MAX = 2, 16
            KM = [
                KMeans(n_clusters=l, init='k-means++').fit(data_wide)
                for l in xrange(K_MIN, K_MAX + 1)
            ]

            # compute scores to assess fit
            scores_bic = [
                computeBic(KM[x], data_wide) for x in xrange(len(KM))
            ]
            scores_inertia = [KM[x].inertia_ for x in xrange(len(KM))]
            scores_silhouette = [
                silhouette_score(data_wide, KM[x].labels_, metric='euclidean')
                for x in xrange(len(KM))
            ]

            # get best clusters
            idx_best_bic = findElbow(
                np.dstack((xrange(K_MIN, K_MAX + 1), scores_bic))[0])
            idx_best_inertia = findElbow(
                np.dstack((xrange(K_MIN, K_MAX + 1), scores_inertia))[0])
            idx_best_silhouette = findElbow(
                np.dstack((xrange(K_MIN, K_MAX + 1), scores_silhouette))[0])
            idx_best = int(
                np.median(
                    (idx_best_bic, idx_best_inertia, idx_best_silhouette))) + 1

            # get clusters and cluster allocations given best K
            k_best = idx_best + K_MIN
            centroids = KM[idx_best].cluster_centers_
            centroid_idx = KM[idx_best].labels_
        elif clustering == 'DPGMM':
            n_components = 12
            dpgmm = DPGMM(n_components=n_components,
                          tol=1e-3,
                          n_iter=32,
                          alpha=1000,
                          covariance_type='diag',
                          verbose=True)
            dpgmm.fit(data_wide)

            # compute scores to assess fit
            scores_bic = dpgmm.bic(data_wide)
            scores_silhouette = [
                silhouette_score(data_wide, centroids, metric='euclidean')
            ]
            scores_silhouette = [0.0]

            # get clusters and cluster allocations given best K
            k_best = dpgmm.means_.shape[0]
            centroids = dpgmm.means_
            centroid_idx = np.argmax(dpgmm.predict_proba(data_wide), axis=1)
        # plot data
        if data.shape[1] == 3:
            data = data.reshape(1, data.shape[0], data.shape[1])
        else:
            data = data.T

        ax = fig.add_subplot(gs[i, :])
        ax.set_title(name)
        ax.imshow(data,
                  interpolation='nearest',
                  origin='low',
                  aspect='auto',
                  cmap=plt.cm.Oranges)
        xlabels = [
            "{}:{}".format(int(x / 60), int(x % 60))
            for x in beat_times[::step_size]
        ]
        ax.set_xticks(ts[::step_size])
        ax.set_xticklabels(xlabels, rotation=60)
        ax.grid(False)

        # plot clustering on raw feature
        changes = np.hstack(([True], centroid_idx[:-1] != centroid_idx[1:]))
        for c in xrange(changes.shape[0] - 1):
            if changes[c] and changes[c + 1]:
                changes[c] = False
        ax_twin = ax.twiny()
        ax_twin.set_xlim(ax.get_xlim())
        ax_twin.set_xticks(np.argwhere(changes)[:, 0])
        ax_twin.set_xticklabels(centroid_idx[changes])
        ax_twin.grid(False)

        # plot codebook (centroids)
        ax = fig.add_subplot(gs[i + 1, 0])
        ax.set_title(name)

        if centroids.shape[1] == 3:
            centroids = centroids.reshape(1, centroids.shape[0],
                                          centroids.shape[1])
        elif centroids.shape[1] == n_singv * length:
            centroids = centroids.reshape(1, centroids.shape[0] * length,
                                          centroids.shape[1] / length)
        else:
            centroids = centroids.reshape(centroids.shape[0] * length,
                                          centroids.shape[1] / length).T
        ax.imshow(centroids,
                  interpolation='nearest',
                  origin='low',
                  aspect='auto',
                  cmap=plt.cm.Oranges)
        ax.set_xticks(xrange(0, centroids.shape[1], 4))
        ax.set_xticklabels(xrange(k_best))
        ax.grid(False)

        # plot elbow curve
        c = 1
        for k, v, idx in (('BIC', scores_bic, idx_best_bic),
                          ('INERTIA', scores_inertia,
                           idx_best_inertia), ('SILHOUETTE', scores_silhouette,
                                               idx_best_silhouette)):
            ax = fig.add_subplot(gs[i + 1, c])
            ax.set_title('{}, {} best K {}'.format(name, k, idx + K_MIN))
            ax.plot(xrange(K_MIN, K_MAX + 1), v, 'b*-')
            ax.set_xlim((K_MIN, K_MAX + 1))
            ax.set_xlabel('Number of clusters')
            ax.set_ylabel('Score')
            ax.grid(True)
            ax.axvline(idx + K_MIN, color='r')
            c += 1
        i += 2
        """
        if 'SVD' in name:
            # scikit-image clustering
            segments_slic = slic(
                data, n_segments=10, compactness=10, sigma=1)
            segments_quickshift = quickshift(
                data, kernel_size=3, max_dist=6, ratio=0.5)

            ax = fig.add_subplot(gs[k, 0])
            ax.set_title('{} with quickshift'.format(name))
            ax.imshow(mark_boundaries(data, segments_quickshift, mode='outer'),
                      interpolation='nearest',
                      origin='low',
                      aspect='auto',
                      cmap=plt.cm.Oranges)
            ax.set_xticks(ts[::step_size])
            ax.set_xticklabels(beat_times[::step_size], rotation=60)
            ax.grid(False)

            ax = fig.add_subplot(gs[k, 1])

            ax.set_title('{} with slic'.format(name))
            ax.imshow(mark_boundaries(data, segments_slic, mode='outer'),
                      interpolation='nearest',
                      origin='low',
                      aspect='auto',
                      cmap=plt.cm.Oranges)
            ax.set_xticks(ts[::step_size])
            ax.set_xticklabels(beat_times[::step_size], rotation=60)
            ax.grid(False)
            k += 1
        """

    plt.tight_layout()
    plt.savefig("{}_clustering_{}_{}_r_{}_n_{}_s_{}_l_{}_{}.png".format(
        filename, feature, cutoff, round_to, normalize, scale, length,
        dim_red))

    # save with large size
    plt.savefig("{}_clustering_{}_{}_r_{}_n_{}_s_{}_l_{}_{}.png".format(
        filename, feature, cutoff, round_to, normalize, scale, length,
        dim_red))
    # save with smaller size
    fig.set_figwidth(36)
    fig.set_figheight(24)
    plt.tight_layout()
    plt.savefig("{}_clustering_{}_{}_r_{}_n_{}_s_{}_l_{}_{}_small.png".format(
        filename, feature, cutoff, round_to, normalize, scale, length,
        dim_red))

    plt.close(fig)