Ejemplo n.º 1
0
    def score(self, multi=False, details=None, **kwargs):

        self.ref_structures.setCoords(self.ref_original)
        self.structures.setCoords(self.this_original)

        if kwargs["exclude_sites"]:
            idxs = self.ref_original.getResindices()

            mask = idxs[np.logical_and.reduce([ idxs != s[0] for s in kwargs["this_score_idxs"]])]
            
            self.ref_original = self.ref_original[mask]
            self.this_original = self.this_original[mask]

            ref_structures = prody.Ensemble()
            structures = prody.Ensemble()
            ref_structures.setAtoms(self.ref_original)
            structures.setAtoms(self.this_original)

            mask = idxs[np.logical_and.reduce([ idxs != s for s in np.array(kwargs["this_score_idxs"]).T[0,0,:]])]        
                       
            ref_structures.addCoordset(self.ref_structures.getCoordsets()[:,mask,:])
            structures.addCoordset(self.structures.getCoordsets()[:,mask,:])
 
            self.ref_structures = ref_structures
            self.structures = structures

            self.ref_cds = None

        if multi:
            return self._score_single(kwargs=kwargs)
        else:
            return self._score_multi(kwargs=kwargs)
Ejemplo n.º 2
0
def get_scaling_factors(original,
                        runs,
                        cd_function,
                        exclude_application_site=True,
                        fitting_operation=None,
                        fitting_string=None,
                        target_cd=1.0):

    out = np.zeros((len(original), runs[0].results.shape[0]))
    coords_out = []

    atoms = original

    for r in runs:
        this_original = original.copy()
        if exclude_application_site:
            this_idxs = [ np.where(atoms.getResindices() == fs.origin.getResindex())[0][0] for fs in r.force_sets ]
            ref_original = this_original.select(" and ".join(["(not resindex %s)"%s for s in this_idxs ]))
            ref_original_c = ref_original.getCoordsets()
        else:
            ref_original_c = this_original.getCoordsets()

        ref_original_c = np.repeat(ref_original_c, r.results.shape[0], axis=0)

        fitted_ref_structures = prody.Ensemble(title="displaced_fitted_structures")
        fitted_ref_structures.setAtoms(atoms)

        for d,displacements in enumerate(r.results):
            displaced_atoms = original.copy()

            displaced_atoms.setCoords(atoms.getCoords() + displacements.reshape((displacements.shape[0]/3,3)))

            if fitting_operation is not None:
                fitting_original_atoms = atoms.select(fitting_string)
                fitting_displaced_atoms = displaced_atoms.select(fitting_string)
                transformation = fitting_operation(fitting_displaced_atoms, fitting_original_atoms).transformation
                this_ref_fit = fitting_operation(displaced_atoms, atoms, transformation=transformation)
                fitted_ref_structures.addCoordset(this_ref_fit.transformed())
            else:
                fitted_ref_structures.addCoordset(displaced_atoms)

        if exclude_application_site:
            filtered_fitted_ref_structures = prody.Ensemble(title="displaced_fitted_structures")
            filtered_fitted_ref_structures.setAtoms(ref_original)
            filtered_fitted_ref_structures.addCoordset(np.delete(fitted_ref_structures.getCoordsets(), this_idxs, axis=1))
        else:
            filtered_fitted_ref_structures = fitted_ref_structures

        coords_out.append(fitted_ref_structures.getCoordsets())
        out[this_idxs,:] = cd_function(ref_original_c, filtered_fitted_ref_structures.getCoordsets())

    out = np.array(out)
    return out, out, np.array(coords_out)
Ejemplo n.º 3
0
def assign_pcs(args):
    fn, topf, eda, pcs, sel, outf = args

    if fn.endswith("pdb"):
        pdb = prody.parsePDB(fn)
        pdb = pdb.select(sel).copy()

        ensemble = prody.Ensemble('A single pdb file ensemble')
        ensemble.setCoords(pdb.getCoords())
        ensemble.addCoordset(pdb.getCoordsets())
        ensemble.iterpose()

        PCs = prody.calcProjection(ensemble, eda[pcs])
        print(PCs)
        return
    elif fn.endswith(".dcd"):

        structure = prody.parsePDB(topf)
        str_sel = structure.select(sel)

        #dcd = prody.DCDFile(fn)
        dcd = prody.Trajectory(fn)
        dcd.link(structure)
        dcd.setCoords(structure)
        dcd.setAtoms(str_sel)

        PCs = prody.calcProjection(dcd, eda[pcs])
        if outf is not None:
            header = " ".join(["PC%d" % (i + 1) for i in pcs])
            np.savetxt(outf, PCs, fmt="%.4f", header=header, comments="")
    else:
        print("Unsupport file type: %s" % fn)
        return None
    return PCs
Ejemplo n.º 4
0
def get_traj_rmsds(reference, trajectory):
    ref_backbone = reference.select('backbone or name OC2')
    traj_backbone = trajectory.select('backbone or name OC2')

    ensemble = prody.Ensemble('trajectory ensemble')
    ensemble.setCoords(ref_backbone.getCoords())
    ensemble.addCoordset(traj_backbone.getCoordsets())

    ensemble.superpose()
    return ensemble.getRMSDs()
Ejemplo n.º 5
0
def calcPCA(filenameEnsemble):
    ubi = dy.parsePDB(filenameEnsemble, subset='ca')
    ensemble = dy.Ensemble('ensemble')
    ensemble.setCoords(ubi.getCoords())
    ensemble.addCoordset(ubi.getCoordsets())
    ensemble.iterpose()
    pca = dy.PCA('Ubiquitin')
    pca.buildCovariance(ensemble)
    pca.calcModes()
    return pca
Ejemplo n.º 6
0
def find_close(native_name, traj_name, skip_frames):
    native = prody.parsePDB(native_name)
    traj = prody.parsePDB(traj_name)

    ensemble = prody.Ensemble('ensemble')
    ensemble.setCoords(native.getCoords())
    ensemble.addCoordset(traj.getCoordsets()[skip_frames:,
                                             ...])  # skip the first 10 frames
    ensemble.superpose()

    native_coords = native.getCoords()
    ensemble_coords = ensemble.getCoordsets()

    diff2 = (ensemble_coords - native_coords)**2
    diff2 = numpy.sum(diff2, axis=2)
    min_dev = numpy.min(diff2, axis=0)
    return numpy.sqrt(numpy.sum(min_dev) / float(min_dev.shape[0]))
Ejemplo n.º 7
0
    def setUpClass(cls):
        # Generate and read the pdb
        cls.pdb_path = "tmp_pdb.pdb"
        open(cls.pdb_path, "w").write(amber_short_ca_contents)
        try:
            prody.confProDy(verbosity='none')  #setVerbosity('none')
        except Exception:
            print "Impossible to silent prody"
        cls.pdb = prody.parsePDB(cls.pdb_path, subset='calpha')

        # Save coordsets before superposition
        cls.not_iterposed_coordsets = numpy.array(cls.pdb.getCoordsets())

        # Do Prody iterposition
        cls.ensemble = prody.Ensemble('pca_test_ensemble')
        cls.ensemble.setCoords(cls.pdb.getCoords())
        cls.ensemble.addCoordset(cls.pdb.getCoordsets())
        #prody.setVerbosity('info')
        cls.ensemble.iterpose()
        cls.coordsets = cls.ensemble.getCoordsets()
Ejemplo n.º 8
0
    def run(self):

        if self.do_write is not None:
            write_DF = DF in self.do_write
            write_DR = DR in self.do_write
            write_F_DR = F_DR in self.do_write
            write_P_XYZ = P_XYZ in self.do_write
            write_FP_XYZ = FP_XYZ in self.do_write
            write_RAW_SCORES = RAW_SCORES in self.do_write
            write_CD = CD in self.do_write
        else:
            write_DF = False
            write_DR = False
            write_F_DR = False
            write_P_XYZ = False
            write_FP_XYZ = False
            write_RAW_SCORES = False
            write_CD = False

        ref_idxs = [ref.origin.getResindex() for ref in self.refs]
        ref_combinations = [[j] for j in self.refs]
        combinations = itertools.product(*(ref_combinations + self.sets))

        self.scores = np.zeros([len(self.indices) for i in self.sets])
        self.max_scores = np.zeros([len(self.indices) for i in self.sets])
        self.min_scores = np.zeros([len(self.indices) for i in self.sets])
        displaced_atoms = self.atoms.copy()

        self.ref_run.prepare()
        self.ref_run.run()

        fitted_ref_structures = prody.Ensemble(title="displaced_fitted_structures")
        fitted_ref_structures.setAtoms(self.atoms)

        if write_F_DR:
            F_DR_values = np.zeros(self.ref_run.results.shape)
        if write_FP_XYZ:
            FP_XYZ_values = np.zeros((self.ref_run.results.shape[0],self.ref_run.results.shape[1]/3,3))
        if write_P_XYZ:
            P_XYZ_values = np.zeros((self.ref_run.results.shape[0],self.ref_run.results.shape[1]/3,3))

        for d,displacements in enumerate(self.ref_run.results):
            displaced_atoms.setCoords(self.atoms.getCoords() + displacements.reshape((displacements.shape[0]/3,3)))
            if self.fitting_operation is not None:
                fitting_original_atoms = self.atoms.select(self.fitting_string)
                fitting_displaced_atoms = displaced_atoms.select(self.fitting_string)
                transformation = self.fitting_operation(fitting_displaced_atoms, fitting_original_atoms).transformation
                this_ref_fit = self.fitting_operation(displaced_atoms, self.atoms, transformation=transformation)
                fitted_ref_structures.addCoordset(this_ref_fit.transformed())
                if write_F_DR:
                    F_DR_values[d,:] = (this_ref_fit.transformed().getCoords()- self.atoms.getCoords()).flatten()
                if write_FP_XYZ:
                    FP_XYZ_values[d,:] = this_ref_fit.transformed().getCoords()
            else:
                fitted_ref_structures.addCoordset(displaced_atoms)

            if write_P_XYZ:
                P_XYZ_values[d,:] = displaced_atoms.getCoords()

        if self.do_write is not None:
            if write_DR:
                self.write_queue.put((DR, ref_idxs, self.ref_run.results))
            if write_DF:
                self.write_queue.put((DF, ref_idxs, self.ref_run.F))
            if write_FP_XYZ:
                self.write_queue.put((FP_XYZ, ref_idxs, FP_XYZ_values))
            if write_F_DR:
                self.write_queue.put((F_DR, ref_idxs, F_DR_values))
            if write_P_XYZ:
                self.write_queue.put((P_XYZ, ref_idxs, P_XYZ_values))

            del self.ref_run.results
            del self.ref_run.F

        for r,run in enumerate(self.runs):

            this_comb = combinations.next()
            this_ref = this_comb[:len(self.refs)]
            this_perturbation = this_comb[-len(self.sets):]
            fitted_structures = prody.Ensemble(title="displaced_fitted_structures")
            fitted_structures.setAtoms(self.atoms)

            this_idxs = [this.origin.getResindex() for this in this_comb]        

            run.prepare()
            run.run()

            for d,displacements in enumerate(run.results):

                displaced_atoms.setCoords(self.atoms.getCoords() + displacements.reshape((displacements.shape[0]/3,3)))

                if self.fitting_operation is not None:
                    fitting_original_atoms = self.atoms.select(self.fitting_string)
                    fitting_displaced_atoms = displaced_atoms.select(self.fitting_string)
                    transformation = self.fitting_operation(fitting_displaced_atoms, fitting_original_atoms).transformation
                    this_fit = self.fitting_operation(displaced_atoms, self.atoms, transformation=transformation)
                    fitted_structures.addCoordset(this_fit.transformed())
                    if write_F_DR:
                        F_DR_values[d,:] = (this_ref_fit.transformed().getCoords() - self.atoms.getCoords()).flatten()
                    if write_FP_XYZ:
                        FP_XYZ_values[d,:] = this_fit.transformed().getCoords()
                else:
                    fitted_structures.addCoordset(displaced_atoms)

                if write_P_XYZ:
                    P_XYZ_values[d,:] = displaced_atoms.getCoords()

            ref_score_idxs = [ np.where(c.origin.getResindex() == self.indices) for c in this_ref ]
            this_score_idxs = [ np.where(c.origin.getResindex() == self.indices) for c in this_comb ]

            self.score_kwargs["ref_idxs"] = ref_idxs
            self.score_kwargs["this_idxs"] = this_idxs
            self.score_kwargs["ref_score_idxs"] = ref_score_idxs
            self.score_kwargs["this_score_idxs"] = this_score_idxs
            
            score_idxs = [ np.where(c.origin.getResindex() == self.indices) for c in this_perturbation ]
            score = self.scoring_function(  self.atoms,
                                            fitted_ref_structures,
                                            fitted_structures,
                                            metadata=[fs.metadata for fs in run.force_sets],
                                          )

            if r == 0:
                ref_cds = score.ref_cds
            else:
                score.ref_cds = ref_cds

            avg_s, min_s, max_s, details = score.get(**self.score_kwargs)

            self.scores[np.array(score_idxs)] = avg_s
            self.min_scores[np.array(score_idxs)] = min_s
            self.max_scores[np.array(score_idxs)] = max_s

            if self.do_write is not None:
                if write_DR:
                    self.write_queue.put((DR, this_idxs, run.results))
                if write_DF:
                    self.write_queue.put((DF, this_idxs, run.F))
                if write_FP_XYZ:
                    self.write_queue.put((FP_XYZ, this_idxs, FP_XYZ_values))
                if write_F_DR:
                    self.write_queue.put((F_DR, this_idxs, F_DR_values))
                if write_P_XYZ:
                    self.write_queue.put((P_XYZ, this_idxs, P_XYZ_values))
                if write_RAW_SCORES:
                    self.write_queue.put((RAW_SCORES, this_idxs, details[RAW_SCORES]))
                if write_CD:
                        self.write_queue.put((ref_CD, this_idxs, details[CD][0]))
                        self.write_queue.put((dfs_CD, this_idxs, details[CD][1]))

            del run.F
            del run.results
Ejemplo n.º 9
0
def prody_pca(coords, **kwargs):
    """Perform PCA calculations for PDB or DCD format *coords* file.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, splitext, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import prody
    LOGGER = prody.LOGGER

    prefix = kwargs.get('prefix')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    quiet = kwargs.pop('quiet', False)
    altloc = kwargs.get('altloc')

    ext = splitext(coords)[1].lower()
    if ext == '.gz':
        ext = splitext(coords[:-3])[1].lower()

    if ext == '.dcd':
        pdb = kwargs.get('psf') or kwargs.get('pdb')
        if pdb:
            if splitext(pdb)[1].lower() == '.psf':
                pdb = prody.parsePSF(pdb)
            else:
                pdb = prody.parsePDB(pdb, altlocs=altlocs)
        dcd = prody.DCDFile(coords)
        if prefix == '_pca' or prefix == '_eda':
            prefix = dcd.getTitle() + prefix

        if len(dcd) < 2:
            raise ValueError('DCD file must have multiple frames')
        if pdb:
            if pdb.numAtoms() == dcd.numAtoms():
                select = pdb.select(selstr)
                dcd.setAtoms(select)
                LOGGER.info('{0} atoms are selected for calculations.'.format(
                    len(select)))
            else:
                select = pdb.select(selstr)
                if select.numAtoms() != dcd.numAtoms():
                    raise ValueError('number of selected atoms ({0}) does '
                                     'not match number of atoms in the DCD '
                                     'file ({1})'.format(
                                         select.numAtoms(), dcd.numAtoms()))
                if pdb.numCoordsets():
                    dcd.setCoords(select.getCoords())

        else:
            select = prody.AtomGroup()
            select.setCoords(dcd.getCoords())
        pca = prody.PCA(dcd.getTitle())

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                if len(dcd) > 1000:
                    pca.buildCovariance(dcd,
                                        aligned=kwargs.get('aligned'),
                                        quiet=quiet)
                    pca.calcModes(nmodes)
                    ensemble = dcd
                else:
                    ensemble = dcd[:]
                    if not kwargs.get('aligned'):
                        ensemble.iterpose(quiet=quiet)
                    pca.performSVD(ensemble)
                nmodes = pca.numModes()
        else:
            if len(dcd) > 1000:
                pca.buildCovariance(dcd,
                                    aligned=kwargs.get('aligned'),
                                    quiet=quiet)
                pca.calcModes(nmodes)
                ensemble = dcd
            else:
                ensemble = dcd[:]
                if not kwargs.get('aligned'):
                    ensemble.iterpose(quiet=quiet)
                pca.performSVD(ensemble)
            nmodes = pca.numModes()

    else:
        pdb = prody.parsePDB(coords)
        if pdb.numCoordsets() < 2:
            raise ValueError('PDB file must contain multiple models')

        if prefix == '_pca' or prefix == '_eda':
            prefix = pdb.getTitle() + prefix

        select = pdb.select(selstr)
        LOGGER.info('{0} atoms are selected for calculations.'.format(
            len(select)))
        if select is None:
            raise ValueError('selection {0} do not match any atoms'.format(
                repr(selstr)))
        LOGGER.info('{0} atoms will be used for PCA calculations.'.format(
            len(select)))
        ensemble = prody.Ensemble(select)
        pca = prody.PCA(pdb.getTitle())
        if not kwargs.get('aligned'):
            ensemble.iterpose()

        nproc = kwargs.get('nproc')
        if nproc:
            try:
                from threadpoolctl import threadpool_limits
            except ImportError:
                raise ImportError(
                    'Please install threadpoolctl to control threads')

            with threadpool_limits(limits=nproc, user_api="blas"):
                pca.performSVD(ensemble)
        else:
            pca.performSVD(ensemble)

    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(pca, join(outdir, prefix))

    if kwargs.get('outscipion'):
        prody.writeScipionModes(outdir, pca)

    prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select)

    extend = kwargs.get('extend')
    if extend:
        if pdb:
            if extend == 'all':
                extended = prody.extendModel(pca[:nmodes], select, pdb)
            else:
                extended = prody.extendModel(pca[:nmodes], select,
                                             select | pdb.bb)
            prody.writeNMD(
                join(outdir, prefix + '_extended_' + extend + '.nmd'),
                *extended)
        else:
            prody.LOGGER.warn('Model could not be extended, provide a PDB or '
                              'PSF file.')
    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')

    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors' + ext),
                         pca.getArray(),
                         delimiter=delim,
                         format=format)
        prody.writeArray(join(outdir, prefix + '_evalues' + ext),
                         pca.getEigvals(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         pca.getCovariance(),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(pca)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir,
                                  prefix + '_cross-correlations' + ext),
                             cc,
                             delimiter=delim,
                             format=format)
        if outall or kwargs.get('outhm'):
            resnums = select.getResnums()
            hmargs = {} if resnums is None else {'resnums': resnums}
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc,
                               xlabel='Residue',
                               ylabel='Residue',
                               title=pca.getTitle() + ' cross-correlations',
                               **hmargs)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqfluct' + ext),
                         prody.calcSqFlucts(pca),
                         delimiter=delim,
                         format=format)
    if outall or kwargs.get('outproj'):
        prody.writeArray(join(outdir, prefix + '_proj' + ext),
                         prody.calcProjection(ensemble, pca),
                         delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    sp = kwargs.get('figproj')

    if figall or cc or sf or sp:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')

            format = format.lower()
            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(pca)
                plt.savefig(join(outdir, prefix + '_cc.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(pca)
                plt.savefig(join(outdir, prefix + '_sf.' + format),
                            dpi=dpi,
                            format=format)
                plt.close('all')
            if figall or sp:
                indices = []
                for item in sp.split():
                    try:
                        if '-' in item:
                            item = item.split('-')
                            if len(item) == 2:
                                indices.append(
                                    list(range(int(item[0]) - 1,
                                               int(item[1]))))
                        elif ',' in item:
                            indices.append(
                                [int(i) - 1 for i in item.split(',')])
                        else:
                            indices.append(int(item) - 1)
                    except:
                        pass
                for index in indices:
                    plt.figure(figsize=(width, height))
                    prody.showProjection(ensemble, pca[index])
                    if isinstance(index, Integral):
                        index = [index]
                    index = [str(i + 1) for i in index]
                    plt.savefig(join(
                        outdir,
                        prefix + '_proj_' + '_'.join(index) + '.' + format),
                                dpi=dpi,
                                format=format)
                    plt.close('all')