Python PCA 예제들, prody.PCA Python 예제들

예제 #1

0

파일 보기

파일: calculatePCA4PELE.py 프로젝트: nostrumbiodiscovery/pele_platform

    def calcPCA(self, ensemble, logger):
        '''
        calcPCA:
        #ensemble: prody ensmeble with structure information
        
        calculate PCA for a set of structures    
        
        return: prody.pca object
        '''
        logger.info("Calculate PCA")
             
        PCAname = ensemble.getTitle()
        pca = prody.PCA(PCAname)
        pca.buildCovariance(ensemble)

        logger.info("PCA")
        pca.calcModes()
        logger.info(repr(pca))
        
        outputname = PCAname + "_pca_modes.nmd"
                    
        prody.writeNMD(outputname, pca[:10], self.selection_ref_structure)
        
        if self.vmd == True:
            prody.viewNMDinVMD(outputname)
        logger.info(f"PCA is saved in: {outputname}")
        return pca, outputname

예제 #2

0

파일 보기

def calcPCA(filenameEnsemble):
    ubi = dy.parsePDB(filenameEnsemble, subset='ca')
    ensemble = dy.Ensemble('ensemble')
    ensemble.setCoords(ubi.getCoords())
    ensemble.addCoordset(ubi.getCoordsets())
    ensemble.iterpose()
    pca = dy.PCA('Ubiquitin')
    pca.buildCovariance(ensemble)
    pca.calcModes()
    return pca

예제 #3

0

파일 보기

파일: TestPCAMetric.py 프로젝트: ztypaker/pyProCT

    def test_eigenvalues(self):
        # do it with PCA metric
        my_cov_matrix = PCAMetric.create_covariance_matrix(
            testPCAMetric.coordsets)
        biggest_eigenvalue = PCAMetric.calculate_biggest_eigenvalue(
            my_cov_matrix)

        # Do it with prody
        pca = prody.PCA('pcametric_pca')
        pca.buildCovariance(testPCAMetric.ensemble)
        pca.calcModes(n_modes=1)
        self.assertAlmostEqual(pca.getEigvals()[0], biggest_eigenvalue, 10)

예제 #4

0

파일 보기

파일: TestPCAMetric.py 프로젝트: ztypaker/pyProCT

    def test_covariance_matrix_vs_prody(self):
        # do it with PCA metric
        my_cov_matrix = PCAMetric.create_covariance_matrix(
            testPCAMetric.coordsets)

        # Do it with prody
        pca = prody.PCA('pcametric_pca')
        pca.buildCovariance(testPCAMetric.ensemble)
        prody_cov_matrix = pca._cov

        # Compare
        numpy.testing.assert_almost_equal(my_cov_matrix, prody_cov_matrix, 10)

예제 #5

0

파일 보기

def get_pca_fluctuations(ensemble, limit=3):
    """
    Get squared fluctuations of each residue according to a PCA on the ensemble
    Parameters
    ----------
    ensemble
        pd.PDBEnsemble object
    limit
        number of PCA modes to consider

    Returns
    -------
    array of squared fluctuations per aligned residue
    """
    pca = pd.PCA()
    pca.buildCovariance(ensemble)
    pca.calcModes()
    return pd.calcSqFlucts(pca[:limit])

예제 #6

0

파일 보기

파일: prody_pca.py 프로젝트: SHZ66/ProDy

def prody_pca(coords, **kwargs):
    """Perform PCA calculations for PDB or DCD format *coords* file.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import prody
    LOGGER = prody.LOGGER

    prefix = kwargs.get('prefix')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    quiet = kwargs.pop('quiet', False)
    altloc = kwargs.get('altloc')

    ext = splitext(coords)[1].lower()
    if ext == '.gz':
        ext = splitext(coords[:-3])[1].lower()

    if ext == '.dcd':
        pdb = kwargs.get('psf') or kwargs.get('pdb')
        if pdb:
            if splitext(pdb)[1].lower() == '.psf':
                pdb = prody.parsePSF(pdb)
            else:
                pdb = prody.parsePDB(pdb, altlocs=altlocs)
        dcd = prody.DCDFile(coords)
        if prefix == '_pca' or prefix == '_eda':
            prefix = dcd.getTitle() + prefix

        if len(dcd) < 2:
            raise ValueError('DCD file must have multiple frames')
        if pdb:
            if pdb.numAtoms() == dcd.numAtoms():
                select = pdb.select(selstr)
                dcd.setAtoms(select)
                LOGGER.info('{0} atoms are selected for calculations.'.format(
                    len(select)))
            else:
                select = pdb.select(selstr)
                if select.numAtoms() != dcd.numAtoms():
                    raise ValueError('number of selected atoms ({0}) does '
                                     'not match number of atoms in the DCD '
                                     'file ({1})'.format(
                                         select.numAtoms(), dcd.numAtoms()))
                if pdb.numCoordsets():
                    dcd.setCoords(select.getCoords())

        else:
            select = prody.AtomGroup()
            select.setCoords(dcd.getCoords())
        pca = prody.PCA(dcd.getTitle())

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                if len(dcd) > 1000:
                    pca.buildCovariance(dcd,
                                        aligned=kwargs.get('aligned'),
                                        quiet=quiet)
                    pca.calcModes(nmodes)
                    ensemble = dcd
                else:
                    ensemble = dcd[:]
                    if not kwargs.get('aligned'):
                        ensemble.iterpose(quiet=quiet)
                    pca.performSVD(ensemble)
                nmodes = pca.numModes()
        else:
            if len(dcd) > 1000:
                pca.buildCovariance(dcd,
                                    aligned=kwargs.get('aligned'),
                                    quiet=quiet)
                pca.calcModes(nmodes)
                ensemble = dcd
            else:
                ensemble = dcd[:]
                if not kwargs.get('aligned'):
                    ensemble.iterpose(quiet=quiet)
                pca.performSVD(ensemble)
            nmodes = pca.numModes()

    else:
        pdb = prody.parsePDB(coords)
        if pdb.numCoordsets() < 2:
            raise ValueError('PDB file must contain multiple models')

        if prefix == '_pca' or prefix == '_eda':
            prefix = pdb.getTitle() + prefix

        select = pdb.select(selstr)
        LOGGER.info('{0} atoms are selected for calculations.'.format(
            len(select)))
        if select is None:
            raise ValueError('selection {0} do not match any atoms'.format(
                repr(selstr)))
        LOGGER.info('{0} atoms will be used for PCA calculations.'.format(
            len(select)))
        ensemble = prody.Ensemble(select)
        pca = prody.PCA(pdb.getTitle())
        if not kwargs.get('aligned'):
            ensemble.iterpose()

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                pca.performSVD(ensemble)
        else:
            pca.performSVD(ensemble)

    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(pca, join(outdir, prefix))

    if kwargs.get('outscipion'):
        prody.writeScipionModes(outdir, pca)

    prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select)

    extend = kwargs.get('extend')
    if extend:
        if pdb:
            if extend == 'all':
                extended = prody.extendModel(pca[:nmodes], select, pdb)
            else:
                extended = prody.extendModel(pca[:nmodes], select,
                                             select | pdb.bb)
            prody.writeNMD(
                join(outdir, prefix + '_extended_' + extend + '.nmd'),
                *extended)
        else:
            prody.LOGGER.warn('Model could not be extended, provide a PDB or '
                              'PSF file.')
    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors' + ext),
                         pca.getArray(),
                         delimiter=delim,
                         format=format)
        prody.writeArray(join(outdir, prefix + '_evalues' + ext),
                         pca.getEigvals(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         pca.getCovariance(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(pca)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir,
                                  prefix + '_cross-correlations' + ext),
                             cc,
                             delimiter=delim,
                             format=format)
        if outall or kwargs.get('outhm'):
            resnums = select.getResnums()
            hmargs = {} if resnums is None else {'resnums': resnums}
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc,
                               xlabel='Residue',
                               ylabel='Residue',
                               title=pca.getTitle() + ' cross-correlations',
                               **hmargs)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqfluct' + ext),
                         prody.calcSqFlucts(pca),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outproj'):
        prody.writeArray(join(outdir, prefix + '_proj' + ext),
                         prody.calcProjection(ensemble, pca),
                         delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    sp = kwargs.get('figproj')

    if figall or cc or sf or sp:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')

            format = format.lower()
            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(pca)
                plt.savefig(join(outdir, prefix + '_cc.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(pca)
                plt.savefig(join(outdir, prefix + '_sf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sp:
                indices = []
                for item in sp.split():
                    try:
                        if '-' in item:
                            item = item.split('-')
                            if len(item) == 2:
                                indices.append(
                                    list(range(int(item[0]) - 1,
                                               int(item[1]))))
                        elif ',' in item:
                            indices.append(
                                [int(i) - 1 for i in item.split(',')])
                        else:
                            indices.append(int(item) - 1)
                    except:
                        pass
                for index in indices:
                    plt.figure(figsize=(width, height))
                    prody.showProjection(ensemble, pca[index])
                    if isinstance(index, Integral):
                        index = [index]
                    index = [str(i + 1) for i in index]
                    plt.savefig(join(
                        outdir,
                        prefix + '_proj_' + '_'.join(index) + '.' + format),
                                dpi=dpi,
                                format=format)
                    plt.close('all')

예제 #7

0

파일 보기

    os.mkdir(output_dir)
  for pdb_FN, conf in zip(new_pdb_FNs,ensemble):
    trans = conf.getTransformation()
    chain_coords = prody.parsePDB(pdb_FN)
    trans.apply(chain_coords)
    outFN = os.path.join(output_dir,os.path.basename(pdb_FN))
    prody.writePDB(outFN, chain_coords)

print 'There are %d structures in the ensemble\n'%len(ensemble)

print "\n*** Principal Components Analysis ***"
pca_FN = os.path.join('prody.pca.npz')
if os.path.exists(pca_FN):
  pca = prody.loadModel(pca_FN)
else:
  pca = prody.PCA()
  pca.buildCovariance(ensemble) # Build covariance matrix
  pca.calcModes() # Calculate modes
  prody.saveModel(pca, filename=pca_FN[:-8])

if not os.path.isdir('figures'):
  os.makedirs('figures')

import matplotlib.pyplot as plt

if not os.path.isfile('rmsd.png'):
  rmsd = prody.calcRMSD(ensemble)
  plt.clf()
  plt.plot(rmsd);
  plt.xlabel('Conformation index');
  plt.ylabel('RMSD (A)');