예제 #1
0
def calc_average(trajectory, cutoff):
    output = prody.AtomGroup('Cartesian average coordinates')
    output_coords = trajectory.getCoords()
    output.setCoords(trajectory.getCoords())
    output.setNames(trajectory.getNames())
    output.setResnums(trajectory.getResnums())
    output.setResnames(trajectory.getResnames())

    ensemble = prody.PDBEnsemble(trajectory)
    ensemble.iterpose()

    cutoff = numpy.mean(ensemble.getRMSDs()) * cutoff

    print 'Using cutoff of {}'.format(cutoff)

    input_coords = ensemble.getCoordsets()

    n_atoms = output_coords.shape[0]
    for i in range(n_atoms):
        print 'Computing residue {} of {}:'.format(i + 1, n_atoms)
        average = get_network_average(input_coords[:, i, :], cutoff)
        output_coords[i, :] = average

    output.setCoords(output_coords)
    return output
예제 #2
0
 def to_prody(self, res):
     import prody as pr
     ag = pr.AtomGroup()
     ag.setCoords(torch.stack(self.bb + self.sc).numpy())
     ag.setNames(self.atom_names)
     ag.setResnames([ONE_TO_THREE_LETTER_MAP[VOCAB._int2char[self.name]]] *
                    len(self.atom_names))
     ag.setResnums([res.getResnum()] * len(self.atom_names))
     return pr.Residue(ag, [0] * len(self.atom_names), None)
    def test_get_ligand_with_no_ligands(self):
        
        test_class = file_manipulation.PDBCruncer()

        self.assertIsNone(test_class.get_ligand(
                                            "dummy_protein_id",
                                            "dummy_filename",
                                            None,
                                            prody.AtomGroup(),
                                            None))
예제 #4
0
def convert_chimera_molecule_to_prody(molecule):
    """
    Function that transforms a chimera molecule into a prody atom group

    Parameters
    ----------
    molecule : chimera.Molecule

    Returns
    -------
    prody_molecule : prody.AtomGroup()
    chimera2prody : dict
        dictionary: chimera2prody[chimera_atom.coordIndex] = i-thm element prody getCoords() array
    """
    prody_molecule = prody.AtomGroup()
    try:
        coords, elements, serials = [], [], []
        names, resnums, resnames = [], [], []
        chids, betas, masses = [], [], []
        chimera2prody = {}
        offset_chimera_residue = min(r.id.position for r in molecule.residues)

        for i, atm in enumerate(molecule.atoms):
            chimera2prody[atm.serialNumber] = i
            coords.append(tuple(atm.coord()))  # array documentation to improve
            elements.append(atm.element.name)
            serials.append(atm.serialNumber)
            names.append(atm.name)
            resnums.append(atm.residue.id.position - offset_chimera_residue)
            resnames.append(atm.residue.type)
            chids.append(atm.residue.id.chainId)
            masses.append(atm.element.mass)
            betas.append(atm.bfactor)

        prody_molecule.setCoords(coords)
        prody_molecule.setElements(elements)
        prody_molecule.setSerials(serials)
        prody_molecule.setNames(names)
        prody_molecule.setResnums(resnums)
        prody_molecule.setResnames(resnames)
        prody_molecule.setChids(chids)
        prody_molecule.setBetas(betas)
        prody_molecule.setMasses(masses)
        prody_molecule.setTitle(str(molecule.name))
        prody_molecule.setBonds([(chimera2prody[bond.atoms[0].serialNumber],
                                  chimera2prody[bond.atoms[1].serialNumber])
                                 for bond in molecule.bonds])

    except AttributeError:
        raise TypeError(
            'Attribute not found. Molecule must be a chimera.Molecule')

    return prody_molecule, chimera2prody
예제 #5
0
def calc_average(trajectory):
    output = prody.AtomGroup('Cartesian average coordinates')
    output.setCoords(trajectory.getCoords())
    output.setNames(trajectory.getNames())
    output.setResnums(trajectory.getResnums())
    output.setResnames(trajectory.getResnames())

    ensemble = prody.PDBEnsemble(trajectory)
    ensemble.iterpose()

    coords = ensemble.getCoordsets()
    average_coords = numpy.mean(coords, axis=0)
    output.setCoords(average_coords)
    return output
예제 #6
0
def get_closest_frame(trajectory, average_structure):
    output = prody.AtomGroup('Cartesian average coordinates')
    output.setCoords(trajectory.getCoords())
    output.setNames(trajectory.getNames())
    output.setResnums(trajectory.getResnums())
    output.setResnames(trajectory.getResnames())

    ensemble = prody.PDBEnsemble(trajectory)
    ensemble.setCoords(average_structure)
    ensemble.superpose()
    rmsds = ensemble.getRMSDs()
    min_index = numpy.argmin(rmsds)

    output.setCoords(ensemble.getCoordsets(min_index))
    return output
예제 #7
0
파일: prody_pca.py 프로젝트: SHZ66/ProDy
def prody_pca(coords, **kwargs):
    """Perform PCA calculations for PDB or DCD format *coords* file.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import prody
    LOGGER = prody.LOGGER

    prefix = kwargs.get('prefix')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    quiet = kwargs.pop('quiet', False)
    altloc = kwargs.get('altloc')

    ext = splitext(coords)[1].lower()
    if ext == '.gz':
        ext = splitext(coords[:-3])[1].lower()

    if ext == '.dcd':
        pdb = kwargs.get('psf') or kwargs.get('pdb')
        if pdb:
            if splitext(pdb)[1].lower() == '.psf':
                pdb = prody.parsePSF(pdb)
            else:
                pdb = prody.parsePDB(pdb, altlocs=altlocs)
        dcd = prody.DCDFile(coords)
        if prefix == '_pca' or prefix == '_eda':
            prefix = dcd.getTitle() + prefix

        if len(dcd) < 2:
            raise ValueError('DCD file must have multiple frames')
        if pdb:
            if pdb.numAtoms() == dcd.numAtoms():
                select = pdb.select(selstr)
                dcd.setAtoms(select)
                LOGGER.info('{0} atoms are selected for calculations.'.format(
                    len(select)))
            else:
                select = pdb.select(selstr)
                if select.numAtoms() != dcd.numAtoms():
                    raise ValueError('number of selected atoms ({0}) does '
                                     'not match number of atoms in the DCD '
                                     'file ({1})'.format(
                                         select.numAtoms(), dcd.numAtoms()))
                if pdb.numCoordsets():
                    dcd.setCoords(select.getCoords())

        else:
            select = prody.AtomGroup()
            select.setCoords(dcd.getCoords())
        pca = prody.PCA(dcd.getTitle())

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                if len(dcd) > 1000:
                    pca.buildCovariance(dcd,
                                        aligned=kwargs.get('aligned'),
                                        quiet=quiet)
                    pca.calcModes(nmodes)
                    ensemble = dcd
                else:
                    ensemble = dcd[:]
                    if not kwargs.get('aligned'):
                        ensemble.iterpose(quiet=quiet)
                    pca.performSVD(ensemble)
                nmodes = pca.numModes()
        else:
            if len(dcd) > 1000:
                pca.buildCovariance(dcd,
                                    aligned=kwargs.get('aligned'),
                                    quiet=quiet)
                pca.calcModes(nmodes)
                ensemble = dcd
            else:
                ensemble = dcd[:]
                if not kwargs.get('aligned'):
                    ensemble.iterpose(quiet=quiet)
                pca.performSVD(ensemble)
            nmodes = pca.numModes()

    else:
        pdb = prody.parsePDB(coords)
        if pdb.numCoordsets() < 2:
            raise ValueError('PDB file must contain multiple models')

        if prefix == '_pca' or prefix == '_eda':
            prefix = pdb.getTitle() + prefix

        select = pdb.select(selstr)
        LOGGER.info('{0} atoms are selected for calculations.'.format(
            len(select)))
        if select is None:
            raise ValueError('selection {0} do not match any atoms'.format(
                repr(selstr)))
        LOGGER.info('{0} atoms will be used for PCA calculations.'.format(
            len(select)))
        ensemble = prody.Ensemble(select)
        pca = prody.PCA(pdb.getTitle())
        if not kwargs.get('aligned'):
            ensemble.iterpose()

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                pca.performSVD(ensemble)
        else:
            pca.performSVD(ensemble)

    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(pca, join(outdir, prefix))

    if kwargs.get('outscipion'):
        prody.writeScipionModes(outdir, pca)

    prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select)

    extend = kwargs.get('extend')
    if extend:
        if pdb:
            if extend == 'all':
                extended = prody.extendModel(pca[:nmodes], select, pdb)
            else:
                extended = prody.extendModel(pca[:nmodes], select,
                                             select | pdb.bb)
            prody.writeNMD(
                join(outdir, prefix + '_extended_' + extend + '.nmd'),
                *extended)
        else:
            prody.LOGGER.warn('Model could not be extended, provide a PDB or '
                              'PSF file.')
    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors' + ext),
                         pca.getArray(),
                         delimiter=delim,
                         format=format)
        prody.writeArray(join(outdir, prefix + '_evalues' + ext),
                         pca.getEigvals(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         pca.getCovariance(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(pca)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir,
                                  prefix + '_cross-correlations' + ext),
                             cc,
                             delimiter=delim,
                             format=format)
        if outall or kwargs.get('outhm'):
            resnums = select.getResnums()
            hmargs = {} if resnums is None else {'resnums': resnums}
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc,
                               xlabel='Residue',
                               ylabel='Residue',
                               title=pca.getTitle() + ' cross-correlations',
                               **hmargs)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqfluct' + ext),
                         prody.calcSqFlucts(pca),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outproj'):
        prody.writeArray(join(outdir, prefix + '_proj' + ext),
                         prody.calcProjection(ensemble, pca),
                         delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    sp = kwargs.get('figproj')

    if figall or cc or sf or sp:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')

            format = format.lower()
            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(pca)
                plt.savefig(join(outdir, prefix + '_cc.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(pca)
                plt.savefig(join(outdir, prefix + '_sf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sp:
                indices = []
                for item in sp.split():
                    try:
                        if '-' in item:
                            item = item.split('-')
                            if len(item) == 2:
                                indices.append(
                                    list(range(int(item[0]) - 1,
                                               int(item[1]))))
                        elif ',' in item:
                            indices.append(
                                [int(i) - 1 for i in item.split(',')])
                        else:
                            indices.append(int(item) - 1)
                    except:
                        pass
                for index in indices:
                    plt.figure(figsize=(width, height))
                    prody.showProjection(ensemble, pca[index])
                    if isinstance(index, Integral):
                        index = [index]
                    index = [str(i + 1) for i in index]
                    plt.savefig(join(
                        outdir,
                        prefix + '_proj_' + '_'.join(index) + '.' + format),
                                dpi=dpi,
                                format=format)
                    plt.close('all')