Esempio n. 1
0
    def _start_inline(self, directories):
        bar = ProgressBar(len(directories), description="Running QM Calculations")

        if self.code == Code.Gaussian:
            cmd = self.gaussian_binary + ' < input.gjf > output.gau 2>&1'
        elif self.code == Code.PSI4:
            cmd = self.psi4_binary + " -i psi4.in -o psi4.out 2>&1"
        elif self.code == Code.TeraChem:
            cmd = self.terachem_binary + " -i terachem.in > terachem.out 2>&1"

        for d in directories:
            f = open(os.path.join(d, "run.sh"), "w")
            print("#!/bin/sh\n%s\n" % (cmd), file=f)
            f.close()
            os.chmod(os.path.join(d, "run.sh"), 0o700)

        for directory in directories:
            cwd = os.getcwd()
            try:
                os.chdir(directory)
                if self.code == Code.Gaussian:
                    if not os.path.exists("output.gau"):
                        subprocess.call(cmd, shell=True)
                elif self.code == Code.PSI4:
                    if not os.path.exists("psi4.out"):
                        subprocess.call(cmd, shell=True)
                elif self.code == Code.TeraChem:
                    if not os.path.exists("terachem.out"):
                        subprocess.call(cmd, shell=True)
            except:
                os.chdir(cwd)
                raise
            os.chdir(cwd)
            bar.progress()
        bar.stop()
Esempio n. 2
0
 def _start(self, directories):
     bar = ProgressBar(len(directories),
                       description="Running QM Calculations")
     for directory in directories:
         cwd = os.getcwd()
         try:
             if self.execution == Execution.Inline:
                 os.chdir(directory)
                 if self.code == Code.Gaussian:
                     if not os.path.exists("output.gau"):
                         subprocess.call('"' + self.gaussian_binary +
                                         '" < input.gjf > output.gau 2>&1',
                                         shell=True)
                 elif self.code == Code.PSI4:
                     if not os.path.exists("psi4.out"):
                         subprocess.call([
                             self.psi4_binary, "-i", "psi4.in", "-o",
                             "psi4.out"
                         ])
         except:
             os.chdir(cwd)
             raise
         os.chdir(cwd)
         bar.progress()
     bar.stop()
Esempio n. 3
0
    def __init__(self, data, lag, units='frames', dimensions=None):
        from pyemma.coordinates.transform.tica import TICA as TICApyemma

        self.data = data
        self.dimensions = dimensions

        if isinstance(data, Metric):  # Memory efficient TICA projecting trajectories on the fly
            if units != 'frames':
                raise RuntimeError('Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.')
            self.tic = TICApyemma(lag)
            metr = data

            p = ProgressBar(len(metr.simulations))
            for proj in _projectionGenerator(metr, _getNcpus()):
                for pro in proj:
                    if pro is None:
                        continue
                    if self.dimensions is None:
                        self.tic.partial_fit(pro[0])
                    else:  # Sub-select dimensions for fitting
                        self.tic.partial_fit(pro[0][:, self.dimensions])
                p.progress(len(proj))
            p.stop()
        else:  # In-memory TICA
            lag = unitconvert(units, 'frames', lag, data.fstep)
            if lag == 0:
                raise RuntimeError('Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.')

            self.tic = TICApyemma(lag)
            if self.dimensions is None:
                datalist = data.dat.tolist()
            else:  # Sub-select dimensions for fitting
                datalist = [x[:, self.dimensions].copy() for x in data.dat]
            self.tic.fit(datalist)
Esempio n. 4
0
File: tica.py Progetto: jhprinz/htmd
    def __init__(self, data, lag, units='frames'):
        from pyemma.coordinates import tica
        # data.dat.tolist() might be better?
        self.data = data
        if isinstance(data, Metric):
            if units != 'frames':
                raise RuntimeError(
                    'Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.'
                )
            metr = data
            from pyemma.coordinates.transform.tica import TICA
            self.tic = TICA(lag)

            p = ProgressBar(len(metr.simulations))
            for proj in _projectionGenerator(metr, _getNcpus()):
                for pro in proj:
                    self.tic.partial_fit(pro[0])
                p.progress(len(proj))
            p.stop()
        else:
            lag = unitconvert(units, 'frames', lag, data.fstep)
            if lag == 0:
                raise RuntimeError(
                    'Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.'
                )
            self.tic = tica(data.dat.tolist(), lag=lag)
Esempio n. 5
0
def progressbar(seq, description=None, total=None):
    p = ProgressBar(total, description=description)
    while True:
        try:
            yield next(seq)
            p.progress() # Had to put progress after yield because last call goes over the total and then I can't decrement in stop()
        except StopIteration:
            p.stop()
            raise
Esempio n. 6
0
def progressbar(seq, description=None, total=None):
    p = ProgressBar(total, description=description)
    while True:
        try:
            yield next(seq)
            p.progress(
            )  # Had to put progress after yield because last call goes over the total and then I can't decrement in stop()
        except StopIteration:
            p.stop()
            raise
Esempio n. 7
0
    def fit(self, data):
        """ Compute the centroids of data.

        Parameters
        ----------
        data : np.ndarray
            A 2D array of data. Columns are features and rows are data examples.
        """
        if len(self.cluster_centers_) != 0:
            logger.warning('Clustering already exists. Reclustering data!')
            self.cluster_centers_ = []
            self.centerFrames = []
            self.clusterSize = []

        # Initialization
        # select random point and assign all points to cluster 0
        numpoints = np.size(data, 0)

        idxCenter = np.random.randint(numpoints)
        self.cluster_centers_.append(data[idxCenter, :])
        self.centerFrames.append(idxCenter)
        self.labels_ = np.zeros(numpoints, dtype=int)

        dist = self._dist(self.cluster_centers_, data)
        countCluster = 1

        p = ProgressBar(self.n_clusters)
        while len(self.cluster_centers_) < self.n_clusters:
            if np.max(dist) == 0:
                break

            # find point furthest away from all center
            newCenterIdx = np.argmax(dist)
            newCenter = data[newCenterIdx, :]
            self.centerFrames.append(newCenterIdx)
            self.cluster_centers_.append(newCenter)

            # find all points closer to new center than old center
            newdist = self._dist(newCenter, data)
            switchIdx = dist > newdist

            # assign them to new cluster
            self.labels_[switchIdx] = countCluster
            dist[switchIdx] = newdist[switchIdx]

            countCluster += 1
            p.progress()
        p.stop()

        # update clusterSize
        self.clusterSize = np.bincount(self.labels_)
        self.distance = dist
        self.cluster_centers_ = np.array(self.cluster_centers_)
Esempio n. 8
0
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5):
    """ Tile a membrane in the X and Y dimensions to reach a specific size.

    Parameters
    ----------
    memb
    xmin
    ymin
    xmax
    ymax
    buffer

    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(memb.resid)

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * (size[0] + buffer)
            ypos = ymin + y * (size[1] + buffer)

            tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            tmpmemb.remove('same resid as (x > {} or y > {})'.format(xmax, ymax), _logger=False)
            tmpmemb.set('segid', 'M{}'.format(k))

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()
    # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles
    # Some clashes will still occur between periodic images however
    megamemb.remove('same fragment as water and within 1.5 of not water', _logger=False)
    return megamemb
Esempio n. 9
0
    def __init__(self, data, lag):
        from pyemma.coordinates import tica
        # data.dat.tolist() might be better?
        self.data = data
        if isinstance(data, Metric):
            from pyemma.coordinates.transform.tica import TICA
            self.tic = TICA(lag)

            p = ProgressBar(len(data.simulations))
            for i in range(len(data.simulations)):
                # Fix for pyemma bug. Remove eventually:
                d, _, _ = data._projectSingle(i)
                if d is None or d.shape[0] < lag:
                    continue
                self.tic.partial_fit(d)
                p.progress()
            p.stop()
        else:
            self.tic = tica(data.dat.tolist(), lag=lag)
Esempio n. 10
0
def tileMembrane(memb, xmin, ymin, xmax, ymax):
    """ Tile the membrane in the X and Y dimensions to reach a specific size.
    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(memb.resid)

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(
        memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * size[0]
            ypos = ymin + y * size[1]

            tmpmemb.moveBy(
                [-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            sel = 'same resid as (x > {} or y > {})'.format(xmax, ymax)
            tmpmemb.remove(sel, _logger=False)
            tmpmemb.set('segid', 'M{}'.format(k))

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()
    return megamemb
Esempio n. 11
0
    def _run_qm_jobs_inline(self, directory):

        cmd = self._env['BIN_G09']

        fni = []
        fno = []
        for root, dirs, files in os.walk(directory):
            for f in files:
                if f.endswith(".gjf"):
                    op = f.replace(".gjf", ".out")
                    if not os.path.exists(os.path.join(root, op)):
                        fni.append(os.path.join(root, f))
                        fno.append(os.path.join(root, op))

        if len(fni):
            bar = ProgressBar(len(fni), description="Running QM Calculations")
            for i in range(len(fni)):
                subprocess.check_output([cmd, fni[i], fno[i]])
                bar.progress()
            bar.stop()
Esempio n. 12
0
    def _run_qm_jobs_inline(self, directory):

        cmd = self._env['BIN_G09']

        fni = []
        fno = []
        for root, dirs, files in os.walk(directory):
            for f in files:
                if (f.endswith(".gjf")):
                    op = f.replace(".gjf", ".out")
                    if not os.path.exists(os.path.join(root, op)):
                        fni.append(os.path.join(root, f))
                        fno.append(os.path.join(root, op))

        if (len(fni)):
            bar = ProgressBar(len(fni), description="Running QM Calculations")
            for i in range(len(fni)):
                subprocess.check_output([cmd, fni[i], fno[i]])
                bar.progress()
            bar.stop()
Esempio n. 13
0
    def __init__(self, data, lag, units='frames'):
        from pyemma.coordinates import tica
        # data.dat.tolist() might be better?
        self.data = data
        if isinstance(data, Metric):
            from pyemma.coordinates.transform.tica import TICA
            lag = unitconvert(units, 'frames', lag, data.fstep)
            self.tic = TICA(lag)

            p = ProgressBar(len(data.simulations))
            for i in range(len(data.simulations)):
                # Fix for pyemma bug. Remove eventually:
                d, _, _ = data._projectSingle(i)
                if d is None or d.shape[0] < lag:
                    continue
                self.tic.partial_fit(d)
                p.progress()
            p.stop()
        else:
            self.tic = tica(data.dat.tolist(), lag=lag)
Esempio n. 14
0
def tileMembrane(memb, xmin, ymin, xmax, ymax):
    """ Tile the membrane in the X and Y dimensions to reach a specific size.
    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(memb.resid)

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * size[0]
            ypos = ymin + y * size[1]

            tmpmemb.moveBy([-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            sel = 'same resid as (x > {} or y > {})'.format(xmax, ymax)
            tmpmemb.remove(sel, _logger=False)
            tmpmemb.set('segid', 'M{}'.format(k))

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()
    return megamemb
Esempio n. 15
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto `ndim` dimensions

        Parameters
        ----------
        ndim : int
            The number of dimensions we want to project the data on.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data

        Example
        -------
        >>> gw = GWPCA(data)
        >>> dataproj = gw.project(5)
        """
        from sklearn.decomposition import IncrementalPCA
        from htmd.progress.progress import ProgressBar
        from htmd.metricdata import MetricData

        pca = IncrementalPCA(n_components=ndim, batch_size=10000)
        p = ProgressBar(len(self.data.dat))
        for d in self.data.dat:
            pca.partial_fit(d * self.weights)
            p.progress()
        p.stop()

        projdata = self.data.copy()
        p = ProgressBar(len(self.data.dat))
        for i, d in enumerate(self.data.dat):
            projdata.dat[i] = pca.transform(d * self.weights)
            p.progress()
        p.stop()

        # projdataconc = pca.fit_transform(self.weighedconcat)
        # projdata.dat = projdata.deconcatenate(projdataconc)
        return projdata
Esempio n. 16
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto `ndim` dimensions

        Parameters
        ----------
        ndim : int
            The number of dimensions we want to project the data on.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data

        Example
        -------
        >>> gw = GWPCA(data)
        >>> dataproj = gw.project(5)
        """
        from sklearn.decomposition import IncrementalPCA
        from htmd.progress.progress import ProgressBar
        from htmd.metricdata import MetricData

        pca = IncrementalPCA(n_components=ndim, batch_size=10000)
        p = ProgressBar(len(self.data.dat))
        for d in self.data.dat:
            pca.partial_fit(d * self.weights)
            p.progress()
        p.stop()

        projdata = self.data.copy()
        p = ProgressBar(len(self.data.dat))
        for i, d in enumerate(self.data.dat):
            projdata.dat[i] = pca.transform(d * self.weights)
            p.progress()
        p.stop()

        # projdataconc = pca.fit_transform(self.weighedconcat)
        # projdata.dat = projdata.deconcatenate(projdataconc)
        return projdata
Esempio n. 17
0
    def _start_inline(self, directories):
        bar = ProgressBar(len(directories),
                          description="Running QM Calculations")

        if self.code == Code.Gaussian:
            cmd = self.gaussian_binary + ' < input.gjf > output.gau 2>&1'
        elif self.code == Code.PSI4:
            cmd = self.psi4_binary + " -i psi4.in -o psi4.out 2>&1"
        elif self.code == Code.TeraChem:
            cmd = self.terachem_binary + " -i terachem.in > terachem.out 2>&1"

        for d in directories:
            f = open(os.path.join(d, "run.sh"), "w")
            print("#!/bin/sh\n%s\n" % (cmd), file=f)
            f.close()
            os.chmod(os.path.join(d, "run.sh"), 0o700)

        for directory in directories:
            cwd = os.getcwd()
            try:
                os.chdir(directory)
                if self.code == Code.Gaussian:
                    if not os.path.exists("output.gau"):
                        subprocess.call(cmd, shell=True)
                elif self.code == Code.PSI4:
                    if not os.path.exists("psi4.out"):
                        subprocess.call(cmd, shell=True)
                elif self.code == Code.TeraChem:
                    if not os.path.exists("terachem.out"):
                        subprocess.call(cmd, shell=True)
            except:
                os.chdir(cwd)
                raise
            os.chdir(cwd)
            bar.progress()
        bar.stop()
Esempio n. 18
0
    def fitSoftTorsion(self, angle, geomopt=True):

        bkp_coords = self.coords.copy()

        phi_to_fit = None
        frozens = []

        for d in self._soft_dihedrals:
            if (d.atoms == angle).all():
                phi_to_fit = d
                frozens.append(d.atoms)
            else:
                if not geomopt:
                    frozens.append(d.atoms)

        if not phi_to_fit:
            raise ValueError("specified phi is not a recognised soft dihedral")

        self._makeDihedralUnique(phi_to_fit)

        atoms = phi_to_fit.atoms
        equivs = phi_to_fit.equivalents

        # Number of rotamers for each dihedral to compute
        nrotamer = 36

        # Create a copy of molecule with nrotamer frames
        mol = self.copy()
        for _ in range(nrotamer - 1):
            mol.appendFrames(self)
        assert mol.numFrames == nrotamer

        # Set rotamer coordinates
        angles = np.linspace(-np.pi, np.pi, num=nrotamer, endpoint=False)
        for frame, angle in enumerate(angles):
            mol.frame = frame
            mol.setDihedral(atoms, angle, bonds=mol.bonds)

        dirname = "dihedral-single-point"
        if geomopt:
            dirname = "dihedral-opt"

        dih_name = "%s-%s-%s-%s" % (self.name[atoms[0]], self.name[atoms[1]],
                                    self.name[atoms[2]], self.name[atoms[3]])

        fitdir = os.path.join(self.outdir, dirname, dih_name,
                              self.output_directory_name())

        try:
            os.makedirs(fitdir, exist_ok=True)
        except:
            raise OSError(
                'Directory {} could not be created. Check if you have permissions.'
                .format(fitdir))

        qmset = QMCalculation(mol,
                              charge=self.netcharge,
                              directory=fitdir,
                              frozen=frozens,
                              optimize=geomopt,
                              theory=self.theory,
                              solvent=self.solvent,
                              basis=self.basis,
                              execution=self.execution,
                              code=self.qmcode)

        ret = self._makeDihedralFittingSetFromQMResults(atoms, qmset.results())

        # Get the initial parameters of the dihedral we are going to fit

        param = self._prm.dihedralParam(self._rtf.type_by_index[atoms[0]],
                                        self._rtf.type_by_index[atoms[1]],
                                        self._rtf.type_by_index[atoms[2]],
                                        self._rtf.type_by_index[atoms[3]])

        # Save these parameters as the best fit (fit to beat)
        best_param = np.zeros((13))
        for t in range(6):
            best_param[t] = param[t].k0
            best_param[t + 6] = param[t].phi0
        best_param[12] = 0.

        # Evalaute the mm potential with this dihedral zeroed out
        # The objective function will try to fit to the delta between
        # The QM potential and the this modified mm potential

        for t in param:
            t.k0 = t.phi0 = 0.
            #t.e14 = 1.  # Use whatever e14 has been inherited for the type
        self._prm.updateDihedral(param)

        ffeval = FFEvaluate(self)

        # Now evaluate the ff without the dihedral being fitted
        for t in range(ret.N):
            mm_zeroed = ffeval.run(ret.coords[t][:, :, 0])['total']
            ret.mm_delta.append(ret.qm[t] - mm_zeroed)
            ret.mm_zeroed.append(mm_zeroed)

        mmin1 = min(ret.mm_zeroed)
        mmin2 = min(ret.mm_delta)
        for t in range(ret.N):
            ret.mm_zeroed[t] = ret.mm_zeroed[t] - mmin1
            ret.mm_delta[t] = ret.mm_delta[t] - mmin2

        self._fitDihedral_results = ret
        self._fitDihedral_phi = param

        # Now measure all of the soft dihedrals phis that are mapped to this dihedral
        ret.phis = []
        for iframe in range(ret.N):
            ret.phis.append([ret.phi[iframe]])
            for atoms in equivs:
                angle = dihedralAngle(ret.coords[iframe][atoms, :, 0])
                ret.phis[iframe].append(angle)

        best_chisq = self._fitDihedral_objective(best_param)

        bar = ProgressBar(64, description="Fitting")
        for iframe in range(64):

            (bounds, start) = self._fitDihedral_make_bounds(iframe)

            xopt = optimize.minimize(self._fitDihedral_objective,
                                     start,
                                     method="L-BFGS-B",
                                     bounds=bounds,
                                     options={'disp': False})

            chisq = self._fitDihedral_objective(xopt.x)
            if (chisq < best_chisq):
                best_chisq = chisq
                best_param = xopt.x
            bar.progress()
        bar.stop()

        # Update the target dihedral with the optimized parameters
        for iframe in range(6):
            param[iframe].k0 = best_param[0 + iframe]
            param[iframe].phi0 = best_param[6 + iframe]

        self._prm.updateDihedral(param)
        param = self._prm.dihedralParam(self._rtf.type_by_index[atoms[0]],
                                        self._rtf.type_by_index[atoms[1]],
                                        self._rtf.type_by_index[atoms[2]],
                                        self._rtf.type_by_index[atoms[3]])

        # Finally evaluate the fitted potential
        ffeval = FFEvaluate(self)
        for t in range(ret.N):
            ret.mm_fitted.append(ffeval.run(ret.coords[t][:, :, 0])['total'])
        mmin = min(ret.mm_fitted)
        chisq = 0.

        for t in range(ret.N):
            ret.mm_fitted[t] = ret.mm_fitted[t] - mmin
            delta = ret.mm_fitted[t] - ret.qm[t]
            chisq = chisq + (delta * delta)
        ret.chisq = chisq

        # TODO Score it
        self.coords = bkp_coords

        return ret
Esempio n. 19
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            self.tic.set_params(dim=ndim)

        keepdata = []
        keepdim = None
        keepdimdesc = None
        if isinstance(self.data, Metric):  # Memory efficient TICA projecting trajectories on the fly
            proj = []
            refs = []
            fstep = None

            metr = self.data
            p = ProgressBar(len(metr.simulations))
            k = -1
            droppedsims = []
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    if self.dimensions is not None:
                        numDimensions = pro[0].shape[1]
                        keepdim = np.setdiff1d(range(numDimensions), self.dimensions)
                        keepdata.append(pro[0][:, keepdim])
                        proj.append(self.tic.transform(pro[0][:, self.dimensions]).astype(np.float32))  # Sub-select dimensions for projecting
                    else:
                        proj.append(self.tic.transform(pro[0]).astype(np.float32))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                p.progress(len(projecteddata))
            p.stop()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            parent = None
            if self.dimensions is not None:
                from htmd.projections.metric import _singleMolfile
                from htmd.molecule.molecule import Molecule
                (single, molfile) = _singleMolfile(metr.simulations)
                if single:
                    keepdimdesc = metr.getMapping(Molecule(molfile))
                    keepdimdesc = keepdimdesc.iloc[keepdim]
        else:
            if ndim is not None and self.data.numDimensions < ndim:
                raise RuntimeError('TICA cannot increase the dimensionality of your data. Your data has {} dimensions and you requested {} TICA dimensions'.format(self.data.numDimensions, ndim))

            if self.dimensions is not None:
                keepdim = np.setdiff1d(range(self.data.numDimensions), self.dimensions)
                keepdata = [x[:, keepdim] for x in self.data.dat]
                if self.data.description is not None:
                    keepdimdesc = self.data.description.iloc[keepdim]
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        # If TICA is done on a subset of dimensions, combine non-projected data with projected data
        if self.dimensions is not None:
            newproj = []
            for k, t in zip(keepdata, proj):
                newproj.append(np.hstack((k, t)))
            proj = newproj

        if ndim is None:
            ndim = self.tic.dimension()
            logger.info('Kept {} dimension(s) to cover 95% of kinetic variance.'.format(ndim))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj), simlist=simlist, ref=ref, fstep=fstep, parent=parent)
        from pandas import DataFrame
        # TODO: Make this messy pandas creation cleaner. I'm sure I can append rows to DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i+1)]
        datatica.description = DataFrame({'type': types, 'atomIndexes': indexes, 'description': description})

        if self.dimensions is not None and keepdimdesc is not None:  # If TICA is done on a subset of dims
            datatica.description = keepdimdesc.append(datatica.description, ignore_index=True)

        return datatica
Esempio n. 20
0
def simlist(datafolders, topologies, inputfolders=None):
    """Creates a list of simulations

    Parameters
    ----------
    datafolders : str list
        A list of directories, each containing a single trajectory
    topologies : str list
        A list of topology files or folders containing a topology file corresponding to the trajectories in dataFolders.
        Can also be a single string to a single structure which corresponds to all trajectories.
    inputfolders : optional, str list
        A list of directories, each containing the input files used to produce the trajectories in dataFolders

    Return
    ------
    sims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of simulations

    Examples
    --------
    >>> simlist(glob('./test/data/*/'), glob('./test/input/*/'), glob('./test/input/*/'))
    >>> simlist(glob('./test/data/*/'), glob('./test/input/*/*.pdb'), glob('./test/input/*/'))
    """
    from htmd.util import ensurelist
    import natsort

    if not datafolders:
        raise FileNotFoundError('No data folders were given, check your arguments.')
    if not topologies:
        raise FileNotFoundError('No molecule files were given, check your arguments.')
    topologies = ensurelist(topologies)
    datafolders = ensurelist(datafolders)
    for folder in datafolders:
        if not os.path.isdir(folder):
            raise NotADirectoryError('{}'.format(folder))
    if inputfolders:
        inputfolders = ensurelist(inputfolders)
        for folder in inputfolders:
            if not os.path.isdir(folder):
                raise NotADirectoryError('{}'.format(folder))

    # I need to match the simulation names inside the globs given. The
    # reason is that there can be more input folders in the glob than in
    # the data glob due to not having been retrieved. Hence I need to match
    # the folder names.

    # Create a hash map of data folder names
    datanames = dict()
    for folder in datafolders:
        if _simName(folder) in datanames:
            raise RuntimeError('Duplicate simulation name detected. Cannot name-match directories.')
        datanames[_simName(folder)] = folder

    molnames = dict()
    for mol in topologies:
        if not os.path.exists(mol):
            raise FileNotFoundError('File {} does not exist'.format(mol))
        molnames[_simName(mol)] = mol

    if inputfolders:
        inputnames = dict()
        for inputf in inputfolders:
            inputnames[_simName(inputf)] = inputf

    logger.debug('Starting listing of simulations.')
    sims = []

    keys = natsort.natsorted(datanames.keys())
    i = 0
    from htmd.progress.progress import ProgressBar
    bar = ProgressBar(len(keys), description='Creating simlist')
    for k in keys:
        trajectories = _autoDetectTrajectories(datanames[k])

        if not trajectories:
            bar.progress()
            continue

        if len(topologies) > 1:
            if k not in molnames:
                raise FileNotFoundError('Did not find molfile with folder name ' + k + ' in the given glob')
            molfile = molnames[k]
        else:
            molfile = topologies[0]

        if os.path.isdir(molfile):
            molfile = _autoDetectTopology(molfile)

        inputf = []
        if inputfolders:
            if k not in inputnames:
                raise FileNotFoundError('Did not find input with folder name ' + k + ' in the given glob')
            inputf = inputnames[k]

        numframes = [_readNumFrames(f) for f in trajectories]
        sims.append(Sim(simid=i, parent=None, input=inputf, trajectory=trajectories, molfile=molfile, numframes=numframes))
        i += 1
        bar.progress()
    bar.stop()
    logger.debug('Finished listing of simulations.')
    return np.array(sims, dtype=object)
Esempio n. 21
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            # self.tic._dim = ndim  # Old way of doing it. Deprecated since pyEMMA 2.1
            self.tic.set_params(dim=ndim)  # Change to this in 2.1 pyEMMA version

        if isinstance(self.data, Metric):  # Doesn't project on correct number of dimensions
            proj = []
            refs = []
            fstep = None

            '''from htmd.config import _config
            from joblib import Parallel, delayed
            results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
                delayed(_test)(self.data, self.tic, i) for i in range(len(self.data.simulations)))

            for i in range(len(results)):
                proj.append(results[i][0])
                refs.append(results[i][1])
                fstep.append(results[i][2])'''

            droppedsims = []
            p = ProgressBar(len(self.data.simulations))
            for i in range(len(self.data.simulations)):
                d, r, f = self.data._projectSingle(i)
                if d is None:
                    droppedsims.append(i)
                    continue
                if fstep is None:
                    fstep = f
                refs.append(r)
                proj.append(self.tic.transform(d))
                p.progress()
            p.stop()
            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            #fstep = 0
            parent = None
        else:
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        if ndim is None:
            logger.info('Kept {} dimension(s) to cover 95% of kinetic variance.'.format(self.tic.dimension()))
        #print(np.shape(proj))


        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj, dtype=object), simlist=simlist, ref=ref, fstep=fstep, parent=parent)

        '''datatica = self.data.copy()
        #datatica.dat = self.data.deconcatenate(np.squeeze(proj))
        datatica.dat = np.array(proj, dtype=object)
        datatica.parent = self.data
        datatica.St = None
        datatica.Centers = None
        datatica.N = None
        datatica.K = None
        datatica._dataid = random.random()
        datatica._clusterid = None'''
        return datatica
Esempio n. 22
0
    def _run_qm_jobs_lsf(self, directory):

        cmd = self._env['BIN_G09']

        fni = []
        fno = []
        for root, dirs, files in os.walk(directory):
            for f in files:
                if (f.endswith(".gjf")):
                    op = f.replace(".gjf", ".out")
                    if not os.path.exists(op):
                        fni.append(os.path.join(root, f))
                        fno.append(os.path.join(root, op))

        if (len(fni)):
            # Make an LSF  script
            for i in range(len(fni)):
                fpbs = fni[i] + ".lsf"
                if not os.path.exists(fpbs):
                    f = open(fpbs, "w")

                    print("#BSUB -n %d" % self._config.NCORES, file=f)
                    print("#BSUB -R \"span[ptile=%d]\"" % self._config.NCORES,
                          file=f)
                    print("#BSUB -W 24:00", file=f)
                    print("#BSUB -J gaussian", file=f)
                    print("#BSUB -app gaussian", file=f)
                    print("#BSUB -o /dev/null", file=f)
                    print("#BSUB -M %d000" % self._config.MEMORY, file=f)
                    print("\nmodule load gaussian\n", file=f)

                    print("cd \"%s\"" % directory, file=f)
                    print("\"%s\" %s %s" % (cmd, fni[i], fno[i]), file=f)
                    f.close()

                # Look to see if there is already a job submitted
                # If not, qsub it
                fpbsstate = fni[i] + ".jobid"
                if not os.path.exists(fpbsstate):
                    # Qsub, saving jobid to file
                    subprocess.check_output("\"" + self._env['BIN_BSUB'] +
                                            "\" < \"" + fpbs + "\" > \"" +
                                            fpbsstate + "\"",
                                            shell=True)

                    # Finally monitor progress. Continue until all jobs have produced an output
            # NB TODO FIXME: should also poll qstat to see if job is still live
            bar = ProgressBar(len(fni), description="Running QM Calculations")
            complete = False
            lastcount = 0
            while not complete:
                count = 0
                complete = True
                for i in fno:
                    # print(" Checking [" + i +"]" )
                    # print( os.path.exists(i) )
                    # print( os.access(i, os.R_OK) )
                    try:
                        os.stat(i)  # Try to flush any cache (for NFS)
                    except:
                        pass
                    if os.access(i, os.R_OK):
                        # print("FOUND")
                        count += 1
                    else:
                        complete = False

                # print( str(count) + " completed of " + str(len(fno)) )
                while lastcount < count:
                    bar.progress()
                    lastcount += 1
                time.sleep(10)
            bar.stop()
            time.sleep(
                5)  # A bit of time for any outputfile to complete writing
Esempio n. 23
0
    def fitSoftTorsion(self, phi, geomopt=True):
        found = False
        phi_to_fit = None
        frozens = []
        dih_index = 0
        i = 0
        bkp_coords = self.coords.copy()

        for d in self._soft_dihedrals:
            if (d.atoms == phi).all():
                phi_to_fit = d
                dih_index = i
                frozens.append(d.atoms)
            else:
                if not geomopt:
                    frozens.append(d.atoms)
            i += 1
        if not phi_to_fit:
            raise ValueError("specified phi is not a recognised soft dihedral")
        self._makeDihedralUnique(phi_to_fit)

        atoms = phi_to_fit.atoms
        left = phi_to_fit.left
        right = phi_to_fit.right
        equivs = phi_to_fit.equivalents

        step = 10  # degrees
        nstep = int(360 / step)
        cset = np.zeros((self.natoms, 3, nstep))

        i = 0
        for phi in range(-180, 180, step):
            cset[:, :, i] = setPhi(self.coords[:, :, 0], atoms, left, right, phi)
            i += 1

        mol = self.copy()
        mol.coords = cset

        dirname = "dihedral-single-point"
        if geomopt:
            dirname = "dihedral-opt"

        dih_name = "%s-%s-%s-%s" % (self.name[atoms[0]], self.name[atoms[1]], self.name[atoms[2]], self.name[atoms[3]])

        fitdir = os.path.join(self.outdir, dirname, dih_name, self.output_directory_name()) 

        try:
            os.makedirs(fitdir, exist_ok=True)
        except:
            raise OSError('Directory {} could not be created. Check if you have permissions.'.format(fitdir))

        qmset = QMCalculation(mol, charge=self.netcharge, directory=fitdir, frozen=frozens, optimize=geomopt, theory=self.theory, solvent=self.solvent,
                              basis=self.basis, execution=self.execution, code=self.qmcode)

        ret = self._makeDihedralFittingSetFromQMResults(atoms, qmset.results())

        # Get the initial parameters of the dihedral we are going to fit

        param = self._prm.dihedralParam(self._rtf.type_by_index[atoms[0]],
                                        self._rtf.type_by_index[atoms[1]],
                                        self._rtf.type_by_index[atoms[2]],
                                        self._rtf.type_by_index[atoms[3]])

        # Save these parameters as the best fit (fit to beat)
        best_param = np.zeros((13))
        for t in range(6):
            best_param[t] = param[t].k0
            best_param[t + 6] = param[t].phi0
        best_param[12] = 0.

        #    print(param)

        # Evalaute the mm potential with this dihedral zeroed out
        # The objective function will try to fit to the delta between
        # The QM potential and the this modified mm potential

        for t in param:
            t.k0 = t.phi0 = 0.
            t.e14 = 1.  # Always fit with e14 scaling of 1. per CHARMM
        self._prm.updateDihedral(param)

        ffe = FFEvaluate(self)
        #  print(ffe.evaluate( ret.coords[0] ) )
        #  input
        # Now evaluate the ff without the dihedral being fitted
        for t in range(ret.N):
            mm_zeroed = (ffe.evaluate(ret.coords[t])["total"])
            ret.mm_delta.append(ret.qm[t] - mm_zeroed)
            ret.mm_zeroed.append(mm_zeroed)

        mmin1 = min(ret.mm_zeroed)
        mmin2 = min(ret.mm_delta)
        for t in range(ret.N):
            ret.mm_zeroed[t] = ret.mm_zeroed[t] - mmin1
            ret.mm_delta[t] = ret.mm_delta[t] - mmin2

        self._fitDihedral_results = ret
        self._fitDihedral_phi = param

        # Now measure all of the soft dihedrals phis that are mapped to this dihedral
        ret.phis = []
        for i in range(ret.N):
            ret.phis.append([ret.phi[i]])
            for e in equivs:
                ret.phis[i].append(getPhi(ret.coords[i], e))
            #    print ("EQUIVALENT DIHEDRALS FOR THIS DIHEDRAL" )
            #    print(equivs)
            #    print ("PHI VALUES TO FIT")
            #    print (ret.phis)
        # Set up the NOLOPT fit
        #  There are 13 parameters, k,phi for n=1,2,3,4,5,6 and a shift
        N = 13
        # initial guess,
        st = np.zeros(13)
        # bounds

        best_chisq = self._fitDihedral_objective(best_param)
        #    print("CHISQ of initial = %f" % ( best_chisq ) )

        # Now zero out the terms of the dihedral we are going to fit
        bar = ProgressBar(64, description="Fitting")
        for i in range(64):

            (bounds, start) = self._fitDihedral_make_bounds(i)

            xopt = optimize.minimize(self._fitDihedral_objective, start, method="L-BFGS-B", bounds=bounds,
                                     options={'disp': False})

            chisq = self._fitDihedral_objective(xopt.x)
            #      print( "CHISQ of fit = %f " % (chisq) )
            if (chisq < best_chisq):
                best_chisq = chisq
                best_param = xopt.x
            bar.progress()
        bar.stop()
        #    print("Best ChiSQ = %f" %(best_chisq) )

        # Update the target dihedral with the optimized parameters
        # print(param)
        # print(best_param )
        for i in range(6):
            param[i].k0 = best_param[0 + i]
            param[i].phi0 = best_param[6 + i]

        self._prm.updateDihedral(param)
        # print(param)
        param = self._prm.dihedralParam(self._rtf.type_by_index[atoms[0]],
                                        self._rtf.type_by_index[atoms[1]],
                                        self._rtf.type_by_index[atoms[2]],
                                        self._rtf.type_by_index[atoms[3]])
        # print(param)

        # Finally evaluate the fitted potential
        ffe = FFEvaluate(self)
        for t in range(ret.N):
            ret.mm_fitted.append(ffe.evaluate(ret.coords[t])["total"])
        mmin = min(ret.mm_fitted)
        chisq = 0.

        #    print( "QM energies" )
        #    print( ret.qm )

        for t in range(ret.N):
            ret.mm_fitted[t] = ret.mm_fitted[t] - mmin
            delta = ret.mm_fitted[t] - ret.qm[t]
            chisq = chisq + (delta * delta)
        ret.chisq = chisq

        # TODO Score it
        self.coords = bkp_coords

        return ret
Esempio n. 24
0
File: tica.py Progetto: jhprinz/htmd
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            # self.tic._dim = ndim  # Old way of doing it. Deprecated since pyEMMA 2.1
            self.tic.set_params(
                dim=ndim)  # Change to this in 2.1 pyEMMA version

        if isinstance(
                self.data,
                Metric):  # Doesn't project on correct number of dimensions
            proj = []
            refs = []
            fstep = None

            metr = self.data
            p = ProgressBar(len(metr.simulations))
            k = -1
            droppedsims = []
            for projecteddata in _projectionGenerator(metr, _getNcpus()):
                for pro in projecteddata:
                    k += 1
                    if pro is None:
                        droppedsims.append(k)
                        continue
                    proj.append(self.tic.transform(pro[0]))
                    refs.append(pro[1])
                    if fstep is None:
                        fstep = pro[2]
                p.progress(len(projecteddata))
            p.stop()

            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            #fstep = 0
            parent = None
        else:
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        if ndim is None:
            logger.info(
                'Kept {} dimension(s) to cover 95% of kinetic variance.'.
                format(self.tic.dimension()))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj, dtype=object),
                              simlist=simlist,
                              ref=ref,
                              fstep=fstep,
                              parent=parent)
        from pandas import DataFrame
        types = []
        indexes = []
        description = []
        for i in range(ndim):
            types += ['tica']
            indexes += [-1]
            description += ['TICA dimension {}'.format(i + 1)]
        datatica.map = DataFrame({
            'type': types,
            'indexes': indexes,
            'description': description
        })

        return datatica
Esempio n. 25
0
    def project(self, ndim=None):
        """ Projects the data object given to the constructor onto the top `ndim` TICA dimensions

        Parameters
        ----------
        ndim : int
            The number of TICA dimensions we want to project the data on. If None is given it will use choose a number
            of dimensions to cover 95% of the kinetic variance.

        Returns
        -------
        dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
            A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the TICA projected data

        Example
        -------
        >>> from htmd.projections.tica import TICA
        >>> tica = TICA(data,20)
        >>> dataTica = tica.project(5)
        """
        if ndim is not None:
            # self.tic._dim = ndim  # Old way of doing it. Deprecated since pyEMMA 2.1
            self.tic.set_params(
                dim=ndim)  # Change to this in 2.1 pyEMMA version

        if isinstance(
                self.data,
                Metric):  # Doesn't project on correct number of dimensions
            proj = []
            refs = []
            fstep = None
            '''from htmd.config import _config
            from joblib import Parallel, delayed
            results = Parallel(n_jobs=_config['ncpus'], verbose=11)(
                delayed(_test)(self.data, self.tic, i) for i in range(len(self.data.simulations)))

            for i in range(len(results)):
                proj.append(results[i][0])
                refs.append(results[i][1])
                fstep.append(results[i][2])'''

            droppedsims = []
            p = ProgressBar(len(self.data.simulations))
            for i in range(len(self.data.simulations)):
                d, r, f = self.data._projectSingle(i)
                if d is None:
                    droppedsims.append(i)
                    continue
                if fstep is None:
                    fstep = f
                refs.append(r)
                proj.append(self.tic.transform(d))
                p.progress()
            p.stop()
            simlist = self.data.simulations
            simlist = np.delete(simlist, droppedsims)
            ref = np.array(refs, dtype=object)
            #fstep = 0
            parent = None
        else:
            proj = self.tic.get_output()
            simlist = self.data.simlist
            ref = self.data.ref
            fstep = self.data.fstep
            parent = self.data

        if ndim is None:
            logger.info(
                'Kept {} dimension(s) to cover 95% of kinetic variance.'.
                format(self.tic.dimension()))
        #print(np.shape(proj))

        from htmd.metricdata import MetricData
        datatica = MetricData(dat=np.array(proj, dtype=object),
                              simlist=simlist,
                              ref=ref,
                              fstep=fstep,
                              parent=parent)
        '''datatica = self.data.copy()
        #datatica.dat = self.data.deconcatenate(np.squeeze(proj))
        datatica.dat = np.array(proj, dtype=object)
        datatica.parent = self.data
        datatica.St = None
        datatica.Centers = None
        datatica.N = None
        datatica.K = None
        datatica._dataid = random.random()
        datatica._clusterid = None'''
        return datatica
Esempio n. 26
0
def tileMembrane(memb, xmin, ymin, xmax, ymax, buffer=1.5):
    """ Tile a membrane in the X and Y dimensions to reach a specific size.

    Parameters
    ----------
    memb : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The membrane to be tiled
    xmin : float
        Minimum x coordinate
    ymin : float
        Minimum y coordinate
    xmax : float
        Maximum x coordinate
    ymax : float
        Maximum y coordinate
    buffer : float
        Buffer distance between tiles

    Returns
    -------
    megamemb :
        A big membrane Molecule
    """
    from htmd.progress.progress import ProgressBar
    memb = memb.copy()
    memb.resid = sequenceID(
        (memb.resid, memb.insertion, memb.chain, memb.segid))

    minmemb = np.min(memb.get('coords', 'water'), axis=0).flatten()

    size = np.max(memb.get('coords', 'water'), axis=0) - np.min(
        memb.get('coords', 'water'), axis=0)
    size = size.flatten()
    xreps = int(np.ceil((xmax - xmin) / size[0]))
    yreps = int(np.ceil((ymax - ymin) / size[1]))

    logger.info('Replicating Membrane {}x{}'.format(xreps, yreps))

    from htmd.molecule.molecule import Molecule
    megamemb = Molecule()
    bar = ProgressBar(xreps * yreps, description='Replicating Membrane')
    k = 0
    for x in range(xreps):
        for y in range(yreps):
            tmpmemb = memb.copy()
            xpos = xmin + x * (size[0] + buffer)
            ypos = ymin + y * (size[1] + buffer)

            tmpmemb.moveBy(
                [-float(minmemb[0]) + xpos, -float(minmemb[1]) + ypos, 0])
            tmpmemb.remove('same resid as (x > {} or y > {})'.format(
                xmax, ymax),
                           _logger=False)
            if tmpmemb.numAtoms == 0:
                continue

            tmpmemb.set('segid', 'M{}'.format(k), sel='not water')
            tmpmemb.set('segid', 'MW{}'.format(k), sel='water')

            megamemb.append(tmpmemb)
            k += 1
            bar.progress()
    bar.stop()

    # Membranes don't tile perfectly. Need to remove waters that clash with lipids of other tiles
    # Some clashes will still occur between periodic images however
    megamemb.remove('same resid as water and within 1.5 of not water',
                    _logger=False)
    return megamemb
Esempio n. 27
0
  def fitSoftDihedral( self, phi, geomopt=True ):
    found=False
    phi_to_fit = None
    frozens=[]
    dih_index=0
    i=0
    bkp_coords = self.coords.copy()

    for d in self._soft_dihedrals:
      if (d.atoms == phi).all():  
         phi_to_fit = d
         dih_index=i
         frozens.append(d.atoms)
      else:
         if not geomopt:
           frozens.append(d.atoms)
      i=i+1
    if not phi_to_fit: raise ValueError( "specified phi is not a recognised soft dihedral" )
    self._makeDihedralUnique( phi_to_fit )

    atoms = phi_to_fit.atoms 
    left  = phi_to_fit.left 
    right = phi_to_fit.right 
    equivs= phi_to_fit.equivalents
 
    step  = 10 # degrees
    nstep = (int)(360/step)
    cset  = np.zeros( ( self.natoms, 3, nstep ) )

    i=0
    for phi in range( -180, 180, step ):
      cset[:,:,i] = setPhi( self.coords[:,:,0], atoms, left, right, phi )
      i=i+1

    mol        = self.copy()
    mol.coords = cset

    dirname = "dihedral-single-point"
    if geomopt: dirname="dihedral-opt"

    try:
      os.mkdir( dirname )
    except:
      pass
    dih_name = "%s-%s-%s-%s" % ( self.name[atoms[0]], self.name[atoms[1]], self.name[atoms[2]], self.name[atoms[3]] )
    qmset   = QMCalculation( mol, charge=self.netcharge, directory= os.path.join( dirname, (dih_name)) , frozen=frozens, optimize=geomopt )

    ret = self._makeDihedralFittingSetFromQMResults( atoms, qmset.results() )

    # Get the initial parameters of the dihedral we are going to fit

    param = self._prm.dihedralParam( self._rtf.type_by_index[ atoms[0] ],
                             self._rtf.type_by_index[ atoms[1] ],
                             self._rtf.type_by_index[ atoms[2] ],
                             self._rtf.type_by_index[ atoms[3] ] )

    # Save these parameters as the best fit (fit to beat)
    best_param=np.zeros((13))
    for t in range(6):
       best_param[t]   = param[t].k0
       best_param[t+6] = param[t].phi0
    best_param[12] = 0.

#    print(param)

    # Evalaute the mm potential with this dihedral zeroed out
    # The objective function will try to fit to the delta between
    # The QM potential and the this modified mm potential

    for t in param:
       t.k0 = t.phi0 = 0.
       t.e14 = 1. # Always fit with e14 scaling of 1. per CHARMM
    self._prm.updateDihedral( param )
 
    ffe = FFEvaluate( self )
  #  print(ffe.evaluate( ret.coords[0] ) )
  #  input
    # Now evaluate the ff without the dihedral being fitted
    for t in range(ret.N):
       mm_zeroed    = ( ffe.evaluate( ret.coords[t] )["total"])
       ret.mm_delta.append( ret.qm[t] - mm_zeroed )
       ret.mm_zeroed.append( mm_zeroed )  

    mmin1 = min( ret.mm_zeroed )
    mmin2 = min( ret.mm_delta )
    for t in range(ret.N): 
      ret.mm_zeroed[t] = ret.mm_zeroed[t] - mmin1
      ret.mm_delta[t]  = ret.mm_delta[t]  - mmin2

    self._fitDihedral_results = ret
    self._fitDihedral_phi     = param

    # Now measure all of the soft dihedrals phis that are mapped to this dihedral 
    ret.phis= []
    for i in range(ret.N):
      ret.phis.append( [ ret.phi[i] ] )
      for e in equivs:
         ret.phis[i].append( getPhi( ret.coords[i], e ) )
#    print ("EQUIVALENT DIHEDRALS FOR THIS DIHEDRAL" )
#    print(equivs)
#    print ("PHI VALUES TO FIT")
#    print (ret.phis)
    #  Set up the NOLOPT fit
    #  There are 13 parameters, k,phi for n=1,2,3,4,5,6 and a shift
    N = 13
    # initial guess,
    st= np.zeros(13)
    # bounds

    best_chisq = self._fitDihedral_objective( best_param )
#    print("CHISQ of initial = %f" % ( best_chisq ) )

    # Now zero out the terms of the dihedral we are going to fit
    bar=ProgressBar(64, description="Fitting")
    for i in range(64):
      
      ( bounds, start ) = self._fitDihedral_make_bounds( i )

      xopt = optimize.minimize( self._fitDihedral_objective, start, method="L-BFGS-B", bounds = bounds  , options={'disp': False } )

      chisq = self._fitDihedral_objective( xopt.x )
#      print( "CHISQ of fit = %f " % (chisq) )
      if( chisq < best_chisq ):
         best_chisq = chisq
         best_param = xopt.x
      bar.progress()
    bar.stop()
#    print("Best ChiSQ = %f" %(best_chisq) )

    # Update the target dihedral with the optimized parameters
    # print(param)
    # print(best_param )
    for i in range(6):
      param[i].k0   = best_param[0+i]
      param[i].phi0 = best_param[6+i]
    
    self._prm.updateDihedral( param )
    # print(param)
    param = self._prm.dihedralParam( self._rtf.type_by_index[ atoms[0] ],
                             self._rtf.type_by_index[ atoms[1] ],
                             self._rtf.type_by_index[ atoms[2] ],
                             self._rtf.type_by_index[ atoms[3] ] )
    # print(param)

    # Finally evaluate the fitted potential
    ffe = FFEvaluate( self )
    for t in range(ret.N):
       ret.mm_fitted.append( ffe.evaluate( ret.coords[t] )["total"] )
    mmin = min(ret.mm_fitted )
    chisq=0.

#    print( "QM energies" )
#    print( ret.qm )

    for t in range(ret.N):
       ret.mm_fitted[t] = ret.mm_fitted[t] - mmin
       delta = ret.mm_fitted[t] - ret.qm[t]
       chisq = chisq + (delta * delta )
    ret.chisq = chisq

# TODO Score it
    self.coords = bkp_coords

    return ret
Esempio n. 28
0
    def _run_qm_jobs_lsf(self, directory):

        cmd = self._env['BIN_G09']

        fni = []
        fno = []
        for root, dirs, files in os.walk(directory):
            for f in files:
                if (f.endswith(".gjf")):
                    op = f.replace(".gjf", ".out")
                    if not os.path.exists(op):
                        fni.append(os.path.join(root, f))
                        fno.append(os.path.join(root, op))

        if (len(fni)):
            # Make an LSF  script
            for i in range(len(fni)):
                fpbs = fni[i] + ".lsf"
                if not os.path.exists(fpbs):
                    f = open(fpbs, "w")

                    print("#BSUB -n %d" % (self._config.NCORES), file=f)
                    print("#BSUB -R \"span[ptile=%d]\"" % (self._config.NCORES), file=f)
                    print("#BSUB -W 24:00", file=f)
                    print("#BSUB -J gaussian", file=f)
                    print("#BSUB -app gaussian", file=f)
                    print("#BSUB -o /dev/null", file=f)
                    print("#BSUB -M %d000" % (self._config.MEMORY), file=f)
                    print("\nmodule load gaussian\n", file=f)

                    print("cd \"%s\"" % (directory), file=f)
                    print("\"%s\" %s %s" % (cmd, fni[i], fno[i]), file=f)
                    f.close()

                # Look to see if there is already a job submitted
                # If not, qsub it
                fpbsstate = fni[i] + ".jobid"
                if not os.path.exists(fpbsstate):
                    # Qsub, saving jobid to file
                    subprocess.check_output(
                        "\"" + self._env['BIN_BSUB'] + "\" < \"" + fpbs + "\" > \"" + fpbsstate + "\"", shell=True)

                    # Finally monitor progress. Continue until all jobs have produced an output
            # NB TODO FIXME: should also poll qstat to see if job is still live
            bar = ProgressBar(len(fni), description="Running QM Calculations")
            complete = False
            lastcount = 0
            while not complete:
                count = 0
                complete = True
                for i in fno:
                    # print(" Checking [" + i +"]" )
                    # print( os.path.exists(i) )
                    # print( os.access(i, os.R_OK) )
                    try:
                        os.stat(i)  # Try to flush any cache (for NFS)
                    except:
                        pass
                    if os.access(i, os.R_OK):
                        # print("FOUND")
                        count = count + 1
                    else:
                        complete = False

                # print( str(count) + " completed of " + str(len(fno)) )
                while (lastcount < count):
                    bar.progress()
                    lastcount = lastcount + 1;
                time.sleep(10);
            bar.stop()
            time.sleep(5)  # A bit of time for any outputfile to complete writing
Esempio n. 29
0
def simlist(datafolders, molfiles, inputfolders=None):
    """Creates a list of simulations

    Parameters
    ----------
    datafolders : str list
        A list of directories, each containing a single trajectory
    molfiles : str list
        A list of pdb files corresponding to the trajectories in dataFolders. Can also be a single string to a single
        structure which corresponds to all trajectories.
    inputfolders : optional, str list
        A list of directories, each containing the input files used to produce the trajectories in dataFolders

    Return
    ------
    sims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of simulations

    Examples
    --------
    >>> simlist(glob('./test/data/*/'), glob('./test/input/*/*.pdb'), glob('./test/input/*/'))
    """

    if not datafolders:
        raise NameError('No data folders were given, check your arguments.')
    if not molfiles:
        raise NameError('No molecule files were given, check your arguments.')
    if isinstance(molfiles, str):
        molfiles = [molfiles]
    if isinstance(datafolders, str):
        datafolders = [datafolders]
    if inputfolders and isinstance(inputfolders, str):
        inputfolders = [inputfolders]

    #Sim = namedtuple('Sim', ['id', 'parent', 'input', 'trajectory', 'molfile'])

    # I need to match the simulation names inside the globs given. The
    # reason is that there can be more input folders in the glob than in
    # the data glob due to not having been retrieved. Hence I need to match
    # the folder names.

    # Create a hash map of data folder names
    datanames = dict()
    for folder in datafolders:
        if _simName(folder) in datanames:
            raise NameError(
                'Duplicate simulation name detected. Cannot name-match directories.'
            )
        datanames[_simName(folder)] = folder

    molnames = dict()
    for mol in molfiles:
        molnames[_simName(mol)] = mol

    if inputfolders:
        inputnames = dict()
        for inputf in inputfolders:
            inputnames[_simName(inputf)] = inputf

    logger.info('Starting listing of simulations.')
    sims = []
    keys = natsort.natsorted(datanames.keys())
    i = 0
    bar = ProgressBar(len(keys), description='Creating simlist')
    for k in keys:
        trajectories = _listTrajectories(datanames[k])

        if not trajectories:
            bar.progress()
            continue

        if len(molfiles) > 1:
            if k not in molnames:
                raise NameError('Did not find molfile with folder name ' + k +
                                ' in the given glob')
            molfile = molnames[k]
        else:
            molfile = molfiles[0]

        inputf = []
        if inputfolders:
            if k not in inputnames:
                raise NameError('Did not find input with folder name ' + k +
                                ' in the given glob')
            inputf = inputnames[k]

        sims.append(
            Sim(simid=i,
                parent=None,
                input=inputf,
                trajectory=trajectories,
                molfile=molfile))
        i += 1
        bar.progress()
    bar.stop()
    logger.info('Finished listing of simulations.')
    return np.array(sims, dtype=object)
Esempio n. 30
0
def simlist(datafolders, topologies, inputfolders=None):
    """Creates a list of simulations

    Parameters
    ----------
    datafolders : str list
        A list of directories, each containing a single trajectory
    topologies : str list
        A list of topology files or folders containing a topology file corresponding to the trajectories in dataFolders.
        Can also be a single string to a single structure which corresponds to all trajectories.
    inputfolders : optional, str list
        A list of directories, each containing the input files used to produce the trajectories in dataFolders

    Return
    ------
    sims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of simulations

    Examples
    --------
    >>> simlist(glob('./test/data/*/'), glob('./test/input/*/'), glob('./test/input/*/'))
    >>> simlist(glob('./test/data/*/'), glob('./test/input/*/*.pdb'), glob('./test/input/*/'))
    """
    from htmd.util import ensurelist
    import natsort

    if not datafolders:
        raise FileNotFoundError(
            'No data folders were given, check your arguments.')
    if not topologies:
        raise FileNotFoundError(
            'No molecule files were given, check your arguments.')
    topologies = ensurelist(topologies)
    datafolders = ensurelist(datafolders)
    for folder in datafolders:
        if not os.path.isdir(folder):
            raise NotADirectoryError('{}'.format(folder))
    if inputfolders:
        inputfolders = ensurelist(inputfolders)
        for folder in inputfolders:
            if not os.path.isdir(folder):
                raise NotADirectoryError('{}'.format(folder))

    # I need to match the simulation names inside the globs given. The
    # reason is that there can be more input folders in the glob than in
    # the data glob due to not having been retrieved. Hence I need to match
    # the folder names.

    # Create a hash map of data folder names
    datanames = dict()
    for folder in datafolders:
        if _simName(folder) in datanames:
            raise RuntimeError(
                'Duplicate simulation name detected. Cannot name-match directories.'
            )
        datanames[_simName(folder)] = folder

    molnames = dict()
    for mol in topologies:
        if not os.path.exists(mol):
            raise FileNotFoundError('File {} does not exist'.format(mol))
        molnames[_simName(mol)] = mol

    if inputfolders:
        inputnames = dict()
        for inputf in inputfolders:
            inputnames[_simName(inputf)] = inputf

    logger.debug('Starting listing of simulations.')
    sims = []

    keys = natsort.natsorted(datanames.keys())
    i = 0
    from htmd.progress.progress import ProgressBar
    bar = ProgressBar(len(keys), description='Creating simlist')
    for k in keys:
        trajectories = _autoDetectTrajectories(datanames[k])

        if not trajectories:
            bar.progress()
            continue

        if len(topologies) > 1:
            if k not in molnames:
                raise FileNotFoundError(
                    'Did not find molfile with folder name ' + k +
                    ' in the given glob')
            molfile = molnames[k]
        else:
            molfile = topologies[0]

        if os.path.isdir(molfile):
            molfile = _autoDetectTopology(molfile)

        inputf = []
        if inputfolders:
            if k not in inputnames:
                raise FileNotFoundError(
                    'Did not find input with folder name ' + k +
                    ' in the given glob')
            inputf = inputnames[k]

        numframes = [_readNumFrames(f) for f in trajectories]
        sims.append(
            Sim(simid=i,
                parent=None,
                input=inputf,
                trajectory=trajectories,
                molfile=molfile,
                numframes=numframes))
        i += 1
        bar.progress()
    bar.stop()
    logger.debug('Finished listing of simulations.')
    return np.array(sims, dtype=object)
Esempio n. 31
0
def simlist(datafolders, molfiles, inputfolders=None):
    """Creates a list of simulations

    Parameters
    ----------
    datafolders : str list
        A list of directories, each containing a single trajectory
    molfiles : str list
        A list of pdb files corresponding to the trajectories in dataFolders. Can also be a single string to a single
        structure which corresponds to all trajectories.
    inputfolders : optional, str list
        A list of directories, each containing the input files used to produce the trajectories in dataFolders

    Return
    ------
    sims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A list of simulations

    Examples
    --------
    >>> simlist(glob('./test/data/*/'), glob('./test/input/*/*.pdb'), glob('./test/input/*/'))
    """

    if not datafolders:
        raise NameError('No data folders were given, check your arguments.')
    if not molfiles:
        raise NameError('No molecule files were given, check your arguments.')
    if isinstance(molfiles, str):
        molfiles = [molfiles]
    if isinstance(datafolders, str):
        datafolders = [datafolders]
    if inputfolders and isinstance(inputfolders, str):
        inputfolders = [inputfolders]

    #Sim = namedtuple('Sim', ['id', 'parent', 'input', 'trajectory', 'molfile'])

    # I need to match the simulation names inside the globs given. The
    # reason is that there can be more input folders in the glob than in
    # the data glob due to not having been retrieved. Hence I need to match
    # the folder names.

    # Create a hash map of data folder names
    datanames = dict()
    for folder in datafolders:
        if _simName(folder) in datanames:
            raise NameError('Duplicate simulation name detected. Cannot name-match directories.')
        datanames[_simName(folder)] = folder

    molnames = dict()
    for mol in molfiles:
        molnames[_simName(mol)] = mol

    if inputfolders:
        inputnames = dict()
        for inputf in inputfolders:
            inputnames[_simName(inputf)] = inputf

    logger.info('Starting listing of simulations.')
    sims = []
    keys = natsort.natsorted(datanames.keys())
    i = 0
    bar = ProgressBar(len(keys), description='Creating simlist')
    for k in keys:
        trajectories = _listXTCs(datanames[k])

        if not trajectories:
            bar.progress()
            continue

        if len(molfiles) > 1:
            if k not in molnames:
                raise NameError('Did not find molfile with folder name ' + k + ' in the given glob')
            molfile = molnames[k]
        else:
            molfile = molfiles[0]

        inputf = []
        if inputfolders:
            if k not in inputnames:
                raise NameError('Did not find input with folder name ' + k + ' in the given glob')
            inputf = inputnames[k]

        sims.append(Sim(simid=i, parent=None, input=inputf, trajectory=trajectories, molfile=molfile))
        i += 1
        bar.progress()
    bar.stop()
    logger.info('Finished listing of simulations.')
    return np.array(sims, dtype=object)
Esempio n. 32
0
def solvate(mol,
            pad=None,
            minmax=None,
            negx=0,
            posx=0,
            negy=0,
            posy=0,
            negz=0,
            posz=0,
            buffer=2.4,
            watsize=65.4195,
            prefix='WT',
            keysel='name OH2',
            rotate=False,
            rotsel='all',
            rotinc=36,
            spdb=None,
            spsf=None,
            stop=None):
    """ Solvates the system in a water box


    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The molecule object we want to solvate
    pad : float
        The padding to add to the minmax in all dimensions. You can specify different padding in each dimension using
        the negx, negy, negz, posx, posy, posz options. This option will override any values in the neg and pos options.
    minmax : list
        Min and max dimensions. Should be a 2D matrix of the form [[minx, miny, minz], [maxx, maxy, maxz]]. If none is
        given, it is calculated from the minimum and maximum coordinates in the mol.
    negx : float
        The padding in the -x dimension
    posx : float
        The padding in the +x dimension
    negy : float
        The padding in the -y dimension
    posy : float
        The padding in the +y dimension
    negz : float
        The padding in the -z dimension
    posz : float
        The padding in the +z dimension
    buffer : float
        How much buffer space to leave empty between waters and other molecules
    watsize : float
        The size of the water box
    prefix : str
        The prefix used for water segments
    keysel : str
        The key selection for water atoms
    rotate : bool
        Enable automated rotation of molecule to fit best in box
    rotsel : str
        The selection of atoms to rotate
    rotinc : float
        The increment in degrees to rotate
    spdb : str
        The path to the water pdb file
    spsf : str
        The path to the water psf file
    stop : str
        The path to the water topology file

    Returns
    -------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A solvated molecule

    Examples
    --------
    >>> smol = solvate(mol, pad=10)
    >>> smol = solvate(mol, minmax=[[-20, -20, -20],[20, 20, 20]])
    """
    mol = mol.copy()
    if mol.numFrames > 1:
        logger.warning(
            'Multiple frames in Molecule. Solvate keeps only frame 0 and discards the rest.'
        )
        mol.coords = np.atleast_3d(mol.coords[:, :, 0])

    if spdb is None:
        spdb = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'wat.pdb')

    if os.path.isfile(spdb):
        logger.info('Using water pdb file at: ' + spdb)
        water = Molecule(spdb)
    else:
        raise NameError('No solvent pdb file found in ' + spdb)

    if pad is not None:
        negx = pad
        posx = pad
        negy = pad
        posy = pad
        negz = pad
        posz = pad

    if rotate:
        raise NameError('Rotation not implemented yet')

    # Calculate min max coordinates from molecule
    if mol.numAtoms > 0:
        minmol = np.min(mol.get('coords'), axis=0)
        maxmol = np.max(mol.get('coords'), axis=0)
    else:
        minmol = [np.inf, np.inf, np.inf]
        maxmol = [-np.inf, -np.inf, -np.inf]

    if minmax is None:
        minc = minmol
        maxc = maxmol
    else:
        if isinstance(minmax, list):
            minmax = np.array(minmax)
        minc = minmax[0, :]
        maxc = minmax[1, :]

    xmin = float(minc[0] - negx)
    xmax = float(maxc[0] + posx)
    ymin = float(minc[1] - negy)
    ymax = float(maxc[1] + posy)
    zmin = float(minc[2] - negz)
    zmax = float(maxc[2] + posz)

    dx = xmax - xmin
    dy = ymax - ymin
    dz = zmax - zmin

    nx = int(np.ceil((dx + 2 * buffer) / watsize))
    ny = int(np.ceil((dy + 2 * buffer) / watsize))
    nz = int(np.ceil((dz + 2 * buffer) / watsize))

    # Calculate number of preexisting water segments with given prefix
    if mol.numAtoms > 0:
        preexist = len(
            np.unique(mol.get('segid', sel='segid "{}.*"'.format(prefix))))
    else:
        preexist = 0

    numsegs = nx * ny * nz
    logger.info('Replicating ' + str(numsegs) + ' water segments, ' + str(nx) +
                ' by ' + str(ny) + ' by ' + str(nz))

    # Check that we won't run out of segment name characters, and switch to
    # using hexadecimal or alphanumeric naming schemes in cases where decimal
    # numbered segnames won't fit into the field width.
    testsegname = '{0}{1:d}'.format(prefix, numsegs + preexist)
    testsegnamehex = '{0}{1:X}'.format(prefix, numsegs + preexist)
    writemode = 'decimal'
    if len(testsegname) > 4 and len(testsegnamehex) <= 4:
        writemode = 'hex'
        logger.warning(
            'Warning: decimal naming would overrun segname field. Using hexadecimal segnames instead...'
        )
    elif len(testsegnamehex) > 4:
        writemode = 'alphanum'
        logger.warning(
            'Warning: decimal or hex naming would overrun segname field. Using alphanumeric segnames instead...'
        )

    minx = minmol[0] - buffer
    miny = minmol[1] - buffer
    minz = minmol[2] - buffer
    maxx = maxmol[0] + buffer
    maxy = maxmol[1] + buffer
    maxz = maxmol[2] + buffer

    bar = ProgressBar(nx * ny * nz, description='Solvating')
    waterboxes = np.empty(numsegs, dtype=object)
    n = preexist
    w = 0
    for i in range(nx):
        movex = xmin + i * watsize
        movexmax = movex + watsize
        xoverlap = True
        if movex > maxx or movexmax < minx:
            xoverlap = False

        for j in range(ny):
            movey = ymin + j * watsize
            moveymax = movey + watsize
            yoverlap = True
            if movey > maxy or moveymax < miny:
                yoverlap = False

            for k in range(nz):
                movez = zmin + k * watsize
                movezmax = movez + watsize
                zoverlap = True
                if movez > maxz or movezmax < minz:
                    zoverlap = False

                if writemode == 'decimal':
                    segname = '{0}{1:d}'.format(prefix, n)
                elif writemode == 'hex':
                    segname = '{0}{1:x}'.format(prefix, n)
                elif writemode == 'alphanum':
                    segname = '{0}{1:c}{2:c}{3:c}'.format(
                        prefix, int(np.floor(np.floor(n / 26) / 26) + 65),
                        int(np.mod(np.floor(n / 26), 26) + 65),
                        int(np.mod(n, 26) + 65))

                waterboxes[w] = water.copy()
                waterboxes[w].moveBy([movex, movey, movez])
                waterboxes[w].set('segid', segname)

                mol.append(waterboxes[w])
                watsel = mol.segid == segname

                selover = np.zeros(len(watsel), dtype=bool)
                if xoverlap and yoverlap and zoverlap:  # Remove water overlapping with other segids
                    selover = _overlapWithOther(mol, segname, buffer)
                # Remove water outside the boundaries
                selout = _outOfBoundaries(mol, segname, xmin, xmax, ymin, ymax,
                                          zmin, zmax)
                sel = selover | selout

                #mol.write('temp.pdb')
                mol.filter(mol.segid != segname, _logger=False)
                waterboxes[w].filter(np.invert(sel[watsel]), _logger=False)
                #waterboxes[w].write('wat' + str(w) + '.pdb')
                n += 1
                w += 1
                bar.progress()
    bar.stop()

    waters = 0
    for i in range(numsegs):
        waters += waterboxes[i].numAtoms
        if waterboxes[i].numAtoms != 0:
            mol.append(waterboxes[i])

    logger.info('{} water molecules were added to the system.'.format(
        int(waters / 3)))
    return mol
Esempio n. 33
0
def solvate(mol, pad=None, minmax=None, negx=0, posx=0, negy=0, posy=0, negz=0, posz=0, buffer=2.4, watsize=65.4195,
            prefix='WT', keysel='name OH2', rotate=False, rotsel='all', rotinc=36, spdb=None,
            spsf=None, stop=None):
    """ Solvates the system in a water box


    Parameters
    ----------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        The molecule object we want to solvate
    pad : float
        The padding to add to the minmax in all dimensions. You can specify different padding in each dimension using
        the negx, negy, negz, posx, posy, posz options. This option will override any values in the neg and pos options.
    minmax : list
        Min and max dimensions. Should be a 2D matrix of the form [[minx, miny, minz], [maxx, maxy, maxz]]. If none is
        given, it is calculated from the minimum and maximum coordinates in the mol.
    negx : float
        The padding in the -x dimension
    posx : float
        The padding in the +x dimension
    negy : float
        The padding in the -y dimension
    posy : float
        The padding in the +y dimension
    negz : float
        The padding in the -z dimension
    posz : float
        The padding in the +z dimension
    buffer : float
        How much buffer space to leave empty between waters and other molecules
    watsize : float
        The size of the water box
    prefix : str
        The prefix used for water segments
    keysel : str
        The key selection for water atoms
    rotate : bool
        Enable automated rotation of molecule to fit best in box
    rotsel : str
        The selection of atoms to rotate
    rotinc : float
        The increment in degrees to rotate
    spdb : str
        The path to the water pdb file
    spsf : str
        The path to the water psf file
    stop : str
        The path to the water topology file

    Returns
    -------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A solvated molecule

    Examples
    --------
    >>> smol = solvate(mol, pad=10)
    >>> smol = solvate(mol, minmax=[[-20, -20, -20],[20, 20, 20]])
    """
    mol = mol.copy()
    if mol.numFrames > 1:
        logger.warning('Multiple frames in Molecule. Solvate keeps only frame 0 and discards the rest.')
        mol.coords = np.atleast_3d(mol.coords[:, :, 0])

    if spdb is None:
        spdb = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'wat.pdb')

    if os.path.isfile(spdb):
        logger.info('Using water pdb file at: ' + spdb)
        water = Molecule(spdb)
    else:
        raise NameError('No solvent pdb file found in ' + spdb)

    if pad is not None:
        negx = pad; posx = pad; negy = pad; posy = pad; negz = pad; posz = pad

    if rotate:
        raise NameError('Rotation not implemented yet')

    # Calculate min max coordinates from molecule
    if mol.numAtoms > 0:
        minmol = np.min(mol.get('coords'), axis=0)
        maxmol = np.max(mol.get('coords'), axis=0)
    else:
        minmol = [np.inf, np.inf, np.inf]
        maxmol = [-np.inf, -np.inf, -np.inf]

    if minmax is None:
        minc = minmol
        maxc = maxmol
    else:
        if isinstance(minmax, list):
            minmax = np.array(minmax)
        minc = minmax[0, :]
        maxc = minmax[1, :]

    xmin = float(minc[0] - negx)
    xmax = float(maxc[0] + posx)
    ymin = float(minc[1] - negy)
    ymax = float(maxc[1] + posy)
    zmin = float(minc[2] - negz)
    zmax = float(maxc[2] + posz)

    dx = xmax - xmin
    dy = ymax - ymin
    dz = zmax - zmin

    nx = int(np.ceil((dx + 2 * buffer) / watsize))
    ny = int(np.ceil((dy + 2 * buffer) / watsize))
    nz = int(np.ceil((dz + 2 * buffer) / watsize))

    # Calculate number of preexisting water segments with given prefix
    if mol.numAtoms > 0:
        preexist = len(np.unique(mol.get('segid', sel='segid "{}.*"'.format(prefix))))
    else:
        preexist = 0

    numsegs = nx * ny * nz
    logger.info('Replicating ' + str(numsegs) + ' water segments, ' + str(nx) + ' by ' + str(ny) + ' by ' + str(nz))

    # Check that we won't run out of segment name characters, and switch to
    # using hexadecimal or alphanumeric naming schemes in cases where decimal
    # numbered segnames won't fit into the field width.
    testsegname    = '{0}{1:d}'.format(prefix, numsegs + preexist)
    testsegnamehex = '{0}{1:X}'.format(prefix, numsegs + preexist)
    writemode = 'decimal'
    if len(testsegname) > 4 and len(testsegnamehex) <= 4:
        writemode = 'hex'
        logger.warning('Warning: decimal naming would overrun segname field. Using hexadecimal segnames instead...')
    elif len(testsegnamehex) > 4:
        writemode = 'alphanum'
        logger.warning('Warning: decimal or hex naming would overrun segname field. Using alphanumeric segnames instead...')

    minx = minmol[0] - buffer; miny = minmol[1] - buffer; minz = minmol[2] - buffer
    maxx = maxmol[0] + buffer; maxy = maxmol[1] + buffer; maxz = maxmol[2] + buffer

    bar = ProgressBar(nx*ny*nz, description='Solvating')
    waterboxes = np.empty(numsegs, dtype=object)
    n = preexist
    w = 0
    for i in range(nx):
        movex = xmin + i * watsize
        movexmax = movex + watsize
        xoverlap = True
        if movex > maxx or movexmax < minx:
            xoverlap = False

        for j in range(ny):
            movey = ymin + j * watsize
            moveymax = movey + watsize
            yoverlap = True
            if movey > maxy or moveymax < miny:
                yoverlap = False

            for k in range(nz):
                movez = zmin + k * watsize
                movezmax = movez + watsize
                zoverlap = True
                if movez > maxz or movezmax < minz:
                    zoverlap = False

                if writemode == 'decimal':
                    segname = '{0}{1:d}'.format(prefix, n)
                elif writemode == 'hex':
                    segname = '{0}{1:x}'.format(prefix, n)
                elif writemode == 'alphanum':
                    segname = '{0}{1:c}{2:c}{3:c}'.format(prefix, int(np.floor(np.floor(n/26)/26) + 65), int(np.mod(np.floor(n/26), 26) + 65), int(np.mod(n, 26) + 65))

                waterboxes[w] = water.copy()
                waterboxes[w].moveBy([movex, movey, movez])
                waterboxes[w].set('segid', segname)

                mol.append(waterboxes[w])
                watsel = mol.segid == segname

                selover = np.zeros(len(watsel), dtype=bool)
                if xoverlap and yoverlap and zoverlap:  # Remove water overlapping with other segids
                    selover = _overlapWithOther(mol, segname, buffer)
                # Remove water outside the boundaries
                selout = _outOfBoundaries(mol, segname, xmin, xmax, ymin, ymax, zmin, zmax)
                sel = selover | selout

                #mol.write('temp.pdb')
                mol.filter(mol.segid != segname, _logger=False)
                waterboxes[w].filter(np.invert(sel[watsel]), _logger=False)
                #waterboxes[w].write('wat' + str(w) + '.pdb')
                n += 1
                w += 1
                bar.progress()
    bar.stop()

    waters = 0
    for i in range(numsegs):
        waters += waterboxes[i].numAtoms
        if waterboxes[i].numAtoms != 0:
            mol.append(waterboxes[i])

    logger.info('{} water molecules were added to the system.'.format(int(waters/3)))
    return mol