Python pca Beispiele, pytraj.pca Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_pca.py Projekt: josejames00/pytraj

    def test_pca_noref_nofit(self):
        '''no reference and do not do fitting
        
        from drroe: " Also, not fitting at all should be considered a legitimate option - 
        you may want to include global rotational and translational motion in your eigenvectors."

        pytraj: pt.pca(traj, mask, n_vecs=2, fit=False)
        '''

        command = '''
        parm data/tz2.parm7
        trajin data/tz2.nc

        matrix covar name MyMatrix !@H=
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
        '''

        traj = pt.load("data/tz2.nc", "data/tz2.parm7")

        # no reference
        state = pt.load_cpptraj_state(command)
        state.run()

        mask = '!@H='

        data = pt.pca(traj, mask, n_vecs=2, fit=False)
        data_ref = pt.pca(traj, mask, n_vecs=2, fit=False, ref=3, ref_mask='@CA')
        cpp_data = state.data[-2:].values
        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)
        # if fit=True, ref will be ignored
        aa_eq(np.abs(data_ref[0]), np.abs(cpp_data), decimal=3)

Beispiel #2

0

Datei anzeigen

Datei: test_pca.py Projekt: hongbo-zhu-cn/pytraj

    def test_traj_on_disk_fit_to_given_reference(self):
        """test_traj_on_disk_fit_to_given_reference
        """
        fit = True
        traj_on_disk = pt.iterload(fn('tz2.nc'), fn('tz2.parm7'))
        traj_on_mem = pt.load(fn('tz2.nc'), fn('tz2.parm7'))
        ref0 = traj_on_disk[0]
        ref1 = traj_on_mem[0]

        data0, _ = pt.pca(traj_on_disk,
                          mask='@CA',
                          n_vecs=2,
                          fit=fit,
                          ref=ref0)
        data1, _ = pt.pca(traj_on_mem, mask='@CA', n_vecs=2, fit=fit, ref=ref1)
        aa_eq(np.abs(data0), np.abs(data1))
        # try again
        # https://github.com/Amber-MD/pytraj/issues/1452
        data2, _ = pt.pca(traj_on_disk,
                          mask='@CA',
                          n_vecs=2,
                          fit=fit,
                          ref=ref0)
        aa_eq(np.abs(data0), np.abs(data2))

Beispiel #3

0

Datei anzeigen

    def test_pca_with_ref_with_different_mask_from_matrix(self):
        '''has reference. Use !@H= for ref_mask and use * for covariance matrix  and projection

        from drroe: "You should be able to supply separate masks for fitting and creating the covariance matrix
        It is common enough for example to only perform rms-fitting on heavy atoms while still wanting all atoms in eigenvectors."

        pytraj: pt.pca(traj, mask=mask_matrix, n_vecs=2, ref=ref, ref_mask=mask_ref)
        '''

        command_ref_provided = '''
        parm data/tz2.parm7
        trajin data/tz2.nc

        reference data/tz2.rst7

        # only perform fitting on heavy atoms
        rms reference !@H=

        # all atoms
        matrix covar name MyMatrix *
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        # all atoms
        crdaction CRD1 projection evecs MyEvecs * out project.dat beg 1 end 2
        '''

        traj = pt.load("data/tz2.nc", "data/tz2.parm7")
        ref = pt.load('data/tz2.rst7', traj.top)

        state = pt.load_cpptraj_state(command_ref_provided)
        state.run()

        mask_ref = '!@H='
        mask_matrix = '*'

        data = pt.pca(traj,
                      mask=mask_matrix,
                      n_vecs=2,
                      ref=ref,
                      ref_mask=mask_ref)
        cpp_data = state.data[-2:].values
        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)

Beispiel #4

0

Datei anzeigen

Datei: test_pca.py Projekt: josejames00/pytraj

    def test_pca_with_ref_with_different_mask_from_matrix(self):
        '''has reference. Use !@H= for ref_mask and use * for covariance matrix  and projection

        from drroe: "You should be able to supply separate masks for fitting and creating the covariance matrix
        It is common enough for example to only perform rms-fitting on heavy atoms while still wanting all atoms in eigenvectors."

        pytraj: pt.pca(traj, mask=mask_matrix, n_vecs=2, ref=ref, ref_mask=mask_ref)
        '''

        command_ref_provided = '''
        parm data/tz2.parm7
        trajin data/tz2.nc

        reference data/tz2.rst7

        # only perform fitting on heavy atoms
        rms reference !@H=

        # all atoms
        matrix covar name MyMatrix *
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        # all atoms
        crdaction CRD1 projection evecs MyEvecs * out project.dat beg 1 end 2
        '''

        traj = pt.load("data/tz2.nc", "data/tz2.parm7")
        ref = pt.load('data/tz2.rst7', traj.top)

        state = pt.load_cpptraj_state(command_ref_provided)
        state.run()

        mask_ref = '!@H='
        mask_matrix = '*'

        data = pt.pca(traj, mask=mask_matrix, n_vecs=2, ref=ref, ref_mask=mask_ref)
        cpp_data = state.data[-2:].values
        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)

Beispiel #5

0

Datei anzeigen

    def test_pca_noref(self):
        '''test_pca_noref: no reference
        
        pytraj: pt.pca(traj, mask, n_vecs=2) 
        '''

        command = '''
        # Step one. Generate average structure.
        # RMS-Fit to first frame to remove global translation/rotation.
        parm {tz2_top}
        trajin {tz2_trajin}

        rms first !@H=
        average crdset AVG
        run

        # Step two. RMS-Fit to average structure. Calculate covariance matrix.
        # Save the fit coordinates.
        rms ref AVG !@H=
        matrix covar name MyMatrix !@H=
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
        '''.format(
            tz2_top=tz2_top, tz2_trajin=tz2_trajin)

        traj = pt.load(fn('tz2.nc'), fn('tz2.parm7'))

        # no reference
        state = pt.load_cpptraj_state(command)
        state.run()

        mask = '!@H='

        data = pt.pca(traj, mask, n_vecs=2)
        cpp_data = state.data[-2:].values
        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)

Beispiel #6

0

Datei anzeigen

Datei: test_pca.py Projekt: josejames00/pytraj

    def test_pca_noref(self):
        '''no reference
        
        pytraj: pt.pca(traj, mask, n_vecs=2) 
        '''

        command = '''
        # Step one. Generate average structure.
        # RMS-Fit to first frame to remove global translation/rotation.
        parm data/tz2.parm7
        trajin data/tz2.nc

        rms first !@H=
        average crdset AVG
        run

        # Step two. RMS-Fit to average structure. Calculate covariance matrix.
        # Save the fit coordinates.
        rms ref AVG !@H=
        matrix covar name MyMatrix !@H=
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
        '''

        traj = pt.load("data/tz2.nc", "data/tz2.parm7")

        # no reference
        state = pt.load_cpptraj_state(command)
        state.run()

        mask = '!@H='

        data = pt.pca(traj, mask, n_vecs=2)
        cpp_data = state.data[-2:].values
        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)

Beispiel #7

0

Datei anzeigen

    def test_pca_with_ref(self):
        '''has reference

        from drroe: "If the user provides their own reference structure, do not create an average structure"

        pytraj: pt.pca(traj, mask, n_vecs=2, ref=ref)
        '''

        command_ref_provided = '''
        parm data/tz2.parm7
        trajin data/tz2.nc
        reference data/tz2.rst7

        rms reference !@H=

        matrix covar name MyMatrix !@H=
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
        '''

        traj = pt.load("data/tz2.nc", "data/tz2.parm7")
        ref = pt.load('data/tz2.rst7', traj.top)

        state = pt.load_cpptraj_state(command_ref_provided)
        state.run()

        mask = '!@H='

        data = pt.pca(traj, mask, n_vecs=2, ref=ref)
        cpp_data = state.data[-2:].values

        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)

Beispiel #8

0

Datei anzeigen

Datei: test_pca.py Projekt: josejames00/pytraj

    def test_pca_with_ref(self):
        '''has reference

        from drroe: "If the user provides their own reference structure, do not create an average structure"

        pytraj: pt.pca(traj, mask, n_vecs=2, ref=ref)
        '''

        command_ref_provided = '''
        parm data/tz2.parm7
        trajin data/tz2.nc
        reference data/tz2.rst7

        rms reference !@H=

        matrix covar name MyMatrix !@H=
        createcrd CRD1
        run

        # Step three. Diagonalize matrix.
        runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs

        # Step four. Project saved fit coordinates along eigenvectors 1 and 2
        crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
        '''

        traj = pt.load("data/tz2.nc", "data/tz2.parm7")
        ref = pt.load('data/tz2.rst7', traj.top)

        state = pt.load_cpptraj_state(command_ref_provided)
        state.run()

        mask = '!@H='

        data = pt.pca(traj, mask, n_vecs=2, ref=ref)
        cpp_data = state.data[-2:].values

        # use absolute values
        aa_eq(np.abs(data[0]), np.abs(cpp_data), decimal=3)

Beispiel #9

0

Datei anzeigen

 def test_pca_raise(self):
     traj = pt.iterload('data/tz2.nc', 'data/tz2.parm7')
     self.assertRaises(ValueError,
                       lambda: pt.pca(traj, n_vecs=2, mask='@CA'))

Beispiel #10

0

Datei anzeigen

Datei: test_pca.py Projekt: josejames00/pytraj

 def test_pca_raise(self):
     traj = pt.iterload('data/tz2.nc', 'data/tz2.parm7')
     self.assertRaises(ValueError, lambda: pt.pca(traj, n_vecs=2, mask='@CA'))

Beispiel #11

0

Datei anzeigen

Datei: test_pca.py Projekt: hongbo-zhu-cn/pytraj

 def test_raises(self):
     frame = pt.iterload(fn('tz2.nc'), fn('tz2.parm7'))[0]
     with pytest.raises(ValueError):
         pt.pca(frame, mask='@CA')

Beispiel #12

0

Datei anzeigen

Datei: example_pca.py Projekt: lambdalisue/pytraj

import pytraj as pt

traj = pt.load('../tests/data/tz2.nc', '../tests/data/tz2.parm7')

data = pt.pca(traj, mask='@CA', n_vecs=3)
print(pt.pca.__doc__)

print('##################')
print('output')
print(data)

Beispiel #13

0

Datei anzeigen

Datei: make_pca.py Projekt: wutobias/collection

def main():

    traj = pt.load(args.traj, args.parm)

    rnd_iter = args.riter
    rnd_vecs = args.evec
    pairs = list()

    if args.mask_proj == None:
        args.mask_proj = args.mask

    print "Mask     : ", args.mask
    print "Mask proj: ", args.mask_proj

    if rnd_vecs < 1:

        rnd_vecs = 3 * traj[args.mask].xyz.shape[1] - 6

    #make pairs
    for n_i in range(rnd_vecs):
        for n_j in range(rnd_vecs):

            if n_i < n_j:

                pairs.append((n_i, n_j))

    sele = pt.select(traj.top, args.mask)

    sele_txt = ""

    for s_i, s in enumerate(sele):

        sele_txt += "%d %s\n" % (s_i, traj.top.atomlist[s])

    o = open("%s_sele.dat" % args.prefix, "w")
    o.write(sele_txt)
    o.close()

    n_vecs = rnd_vecs
    pca_data, eigen = pt.pca(traj[args.start:], mask=args.mask, n_vecs=n_vecs)
    eigen_val = eigen[0]
    eigen_vec = eigen[1]
    np.savetxt("%s_eigen_vec.dat" % args.prefix,
               np.c_[eigen_vec[0], eigen_vec[1], eigen_vec[2]])
    np.savetxt("%s_pcadata.dat" % args.prefix, pca_data.T)

    #h        = hist(pca_data[0], pca_data[1])
    #h.plot2d(xlab="PC1 [$\AA$]", ylab="PC2 [$\AA$]", title="PCA", name=args.out)

    # Plot PCA
    for pc_i, pc_j in pairs:

        plt.scatter(pca_data[pc_i],
                    pca_data[pc_j],
                    marker='o',
                    c="r",
                    alpha=0.5)
        plt.xlabel("PC%d [$\AA$]" % pc_i)
        plt.ylabel("PC%d [$\AA$]" % pc_j)
        plt.title("PCA PC%d vs. PC%d" % (pc_i, pc_j))
        plt.savefig("PC%d-vs-PC%s_%s.png" % (pc_i, pc_j, args.prefix))
        plt.close('all')

    # Plot atom contritbuion
    for pc_i in range(3):

        l = eigen_vec[pc_i].shape[0]
        c = np.linalg.norm(eigen_vec[pc_i].reshape((l / 3, 3)), axis=1)
        a = np.arange(l / 3) + 1
        plt.plot(a, c, label="PC%s" % pc_i, alpha=0.5)
        plt.legend()

    plt.xlim(0, l / 3 + 1)
    plt.xlabel("Atom ID")
    plt.ylabel("Eigenvector components")
    plt.title("Eigenvectors")
    plt.savefig("Eigenvectors_%s.png" % args.prefix)
    plt.close('all')

    total_var = np.sum(eigen_val)

    plt.scatter(range(1, n_vecs + 1), (np.cumsum(eigen_val) / total_var) * 100,
                label="Cumulative Variance")
    plt.plot(range(1, n_vecs + 1), (eigen_val / total_var) * 100,
             "g--",
             label="Variance")
    plt.legend()
    #plt.xticks(range(1, n_vecs+1, 2))
    plt.xlabel("Eigenvector #")
    plt.ylabel("Variance explained [%]")
    plt.title("Variance explained by PC Eigenvectors")
    plt.savefig("Variance_%s.png" % args.prefix, dpi=1000)
    plt.close('all')

    if args.traj_proj != None and args.parm_proj != None:

        traj_proj = pt.load(args.traj_proj, args.parm_proj)
        pt.rmsd(traj_proj, mask=args.mask_proj)
        #avg_proj        = pt.mean_structure(traj_proj, mask=args.mask)
        #pt.rmsd(traj_proj, mask=args.mask, ref=avg_proj)
        projection_data = pt.projection(traj_proj[args.start_proj:], args.mask_proj, eigenvalues=eigen_val,\
                                                                                           eigenvectors=eigen_vec,\
                                                                                           scalar_type='covar')
        np.savetxt("%s_pcadata_proj.dat" % args.prefix, projection_data.T)

        #h = hist(projection_data[0], projection_data[1])
        #h.plot2d(xlab="PC1 [$\AA$]", ylab="PC2 [$\AA$]", title="PCA projection", name=args.out_proj)
        for pc_i, pc_j in pairs:

            plt.scatter(pca_data[pc_i],
                        pca_data[pc_j],
                        marker='o',
                        c="r",
                        alpha=0.5)
            plt.scatter(projection_data[pc_i],
                        projection_data[pc_j],
                        marker='o',
                        c="g",
                        alpha=0.5)
            plt.xlabel("PC%d [$\AA$]" % pc_i)
            plt.ylabel("PC%d [$\AA$]" % pc_j)
            plt.title("PCA PC%d vs. PC%d with projection" % (pc_i, pc_j))
            plt.savefig("PC%d-vs-PC%s_%s_projection.png" %
                        (pc_i, pc_j, args.prefix))
            plt.close('all')

            plt.scatter(projection_data[pc_i],
                        projection_data[pc_j],
                        marker='o',
                        c="g",
                        alpha=0.5)
            plt.xlabel("PC%d [$\AA$]" % pc_i)
            plt.ylabel("PC%d [$\AA$]" % pc_j)
            plt.title("PCA PC%d vs. PC%d only projection" % (pc_i, pc_j))
            plt.savefig("PC%d-vs-PC%d_%s_only_projection.png" %
                        (pc_i, pc_j, args.prefix))
            plt.close('all')

        pca_data_2, eigen_2 = pt.pca(traj_proj[args.start_proj:],
                                     mask=args.mask_proj,
                                     n_vecs=n_vecs)
        eigen_val_2 = eigen_2[0]
        eigen_vec_2 = eigen_2[1]

        overlap = 0

        for pc_i in range(rnd_vecs):

            for pc_j in range(rnd_vecs):

                overlap += (np.dot(eigen_vec[pc_i], eigen_vec_2[pc_j]) /
                            (np.linalg.norm(eigen_vec[pc_i]) *
                             np.linalg.norm(eigen_vec_2[pc_j])))**2

        overlap /= rnd_vecs
        print "Vector space spanned by traj-1 overlap with traj-2 subspace (%d vecs): %6.3f" % (
            rnd_vecs, overlap)

        if args.zscore != None:

            overlap_rnd = np.zeros(rnd_iter)

            for r in range(rnd_iter):

                ### make random traj
                t1_rnd = traj
                for f in range(t1_rnd.xyz[args.start:].shape[0]):

                    idxs = np.arange(t1_rnd.xyz[args.start + f, ].shape[0])
                    sele = np.random.permutation(idxs)
                    t1_rnd[f] = t1_rnd.xyz[args.start + f, ][sele]

                pca_t1_rnd, eigen_t1_rnd = pt.pca(t1_rnd[args.start:],
                                                  mask=args.mask,
                                                  n_vecs=n_vecs)

                eigen_vec_1_rnd = eigen_t1_rnd[1]

                for pc_i in range(n_vecs):

                    for pc_j in range(n_vecs):

                        overlap_rnd[r] += (
                            np.dot(eigen_vec[pc_i], eigen_vec_1_rnd[pc_j]) /
                            (np.linalg.norm(eigen_vec[pc_i]) *
                             np.linalg.norm(eigen_vec_1_rnd[pc_j])))**2

                overlap_rnd[r] /= n_vecs

            z_score = (overlap - np.mean(overlap_rnd)) / np.std(overlap_rnd)

            print "Z-score                                                              : %6.3f" % z_score

Beispiel #14

0

Datei anzeigen

def main():

    X = pt.load(args.traj, args.parm, stride=args.stride)

    if args.pca == "no":

        X = X[args.mask].xyz[args.start:]
        shape = X.shape
        X = X.reshape((shape[0], shape[1] * 3))

    else:

        n_vecs = 3 * X[args.mask].xyz[args.start:].shape[1] - 6
        pca_data, eigen = pt.pca(X[args.start:], n_vecs=n_vecs, mask=args.mask)
        eigen_val = eigen[0]
        eigen_vec = eigen[1]
        np.savetxt("%s_eigen_vec.dat" % args.prefix,
                   np.c_[eigen_vec[0], eigen_vec[1], eigen_vec[2]])

        pairs = list()

        #make pairs
        for n_i in range(3):
            for n_j in range(3):

                if n_i < n_j:

                    pairs.append((n_i, n_j))

        # Plot PCA
        for pc_i, pc_j in pairs:

            plt.scatter(pca_data[pc_i],
                        pca_data[pc_j],
                        marker='o',
                        c="r",
                        alpha=0.5)
            plt.xlabel("PC%d [$\AA$]" % pc_i)
            plt.ylabel("PC%d [$\AA$]" % pc_j)
            plt.title("PCA PC%d vs. PC%d" % (pc_i, pc_j))
            plt.savefig("PC%d-vs-PC%s_%s.png" % (pc_i, pc_j, args.prefix),
                        dpi=1000)
            plt.close('all')

        # Plot atom contritbuion
        for pc_i in range(3):

            l = eigen_vec[pc_i].shape[0]
            c = np.linalg.norm(eigen_vec[pc_i].reshape((l / 3, 3)), axis=1)
            a = np.arange(l / 3) + 1
            plt.plot(a, c, label="PC%s" % pc_i, alpha=0.5)
            plt.legend()

        plt.xlim(0, l / 3 + 1)
        plt.xlabel("Atom ID")
        plt.ylabel("Eigenvector components")
        plt.title("Eigenvectors")
        plt.savefig("Eigenvectors_%s.png" % args.prefix, dpi=1000)
        plt.close('all')

        total_var = np.sum(eigen_val)

        plt.scatter(range(1, n_vecs + 1),
                    (np.cumsum(eigen_val) / total_var) * 100,
                    label="Cumulative Variance")
        plt.plot(range(1, n_vecs + 1), (eigen_val / total_var) * 100,
                 "g--",
                 label="Eigenvector Variance")
        plt.legend()
        #plt.xticks(range(1, n_vecs+1, 2))
        plt.xlabel("Eigenvector #")
        plt.ylabel("Fractional of Variance explained [%]")
        plt.title("Explained total variance explained by PCA")
        plt.savefig("Variance_%s.png" % args.prefix, dpi=1000)
        plt.close('all')

        X = pca_data

    range_n_clusters = range(2, 20)

    for n_clusters in range_n_clusters:
        # Create a subplot with 1 row and 2 columns
        if args.pca == "yes":
            fig, (ax1, ax2) = plt.subplots(1, 2)
            fig.set_size_inches(18, 7)
        else:
            fig, (ax1, ax2) = plt.subplots(1, 1)

        # The 1st subplot is the silhouette plot
        # The silhouette coefficient can range from -1, 1 but in this example all
        # lie within [-0.1, 1]
        ax1.set_xlim([-0.1, 1])
        # The (n_clusters+1)*10 is for inserting blank space between silhouette
        # plots of individual clusters, to demarcate them clearly.
        ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])

        # Initialize the clusterer with n_clusters value and a random generator
        # seed of rand for reproducibility.
        rand = np.random.randint(99999)
        print("Random seed is %d." % rand)
        clusterer = KMeans(n_clusters=n_clusters, random_state=rand)
        cluster_labels = clusterer.fit_predict(X)

        # The silhouette_score gives the average value for all the samples.
        # This gives a perspective into the density and separation of the formed
        # clusters
        silhouette_avg = silhouette_score(X, cluster_labels)
        print("For n_clusters =", n_clusters,
              "The average silhouette_score is :", silhouette_avg)

        # Compute the silhouette scores for each sample
        sample_silhouette_values = silhouette_samples(X, cluster_labels)

        y_lower = 10
        for i in range(n_clusters):
            # Aggregate the silhouette scores for samples belonging to
            # cluster i, and sort them
            ith_cluster_silhouette_values = \
                sample_silhouette_values[cluster_labels == i]

            ith_cluster_silhouette_values.sort()

            size_cluster_i = ith_cluster_silhouette_values.shape[0]
            y_upper = y_lower + size_cluster_i

            color = cm.spectral(float(i) / n_clusters)
            ax1.fill_betweenx(np.arange(y_lower, y_upper),
                              0,
                              ith_cluster_silhouette_values,
                              facecolor=color,
                              edgecolor=color,
                              alpha=0.7)

            # Label the silhouette plots with their cluster numbers at the middle
            ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

            # Compute the new y_lower for next plot
            y_lower = y_upper + 10  # 10 for the 0 samples

        ax1.set_title("The silhouette plot for the various clusters.")
        ax1.set_xlabel("The silhouette coefficient values")
        ax1.set_ylabel("Cluster label")

        # The vertical line for average silhouette score of all the values
        ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

        ax1.set_yticks([])  # Clear the yaxis labels / ticks
        ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

        if args.pca == "yes":

            # 2nd Plot showing the actual clusters formed
            colors = cm.spectral(cluster_labels.astype(float) / n_clusters)
            ax2.scatter(X[:, 0],
                        X[:, 1],
                        marker='.',
                        s=30,
                        lw=0,
                        alpha=0.7,
                        c=colors)

            # Labeling the clusters
            centers = clusterer.cluster_centers_
            # Draw white circles at cluster centers
            ax2.scatter(centers[:, 0],
                        centers[:, 1],
                        marker='o',
                        c="white",
                        alpha=1,
                        s=200)

            for i, c in enumerate(centers):
                ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50)

            ax2.set_title("The visualization of the clustered data.")
            ax2.set_xlabel("Feature space for the 1st feature")
            ax2.set_ylabel("Feature space for the 2nd feature")

        plt.suptitle(
            ("Silhouette analysis for KMeans clustering on sample data "
             "with n_clusters = %d" % n_clusters),
            fontsize=14,
            fontweight='bold')

        plt.savefig("%s_silhouette_n=%d.png" % (args.prefix, n_clusters),
                    dpi=1000)
        plt.close('all')