def contactVecToMatrix(vector, atomIndexes): from copy import deepcopy # Calculating the unique atom groups in the mapping uqAtomGroups = [] atomIndexes = deepcopy(list(atomIndexes)) for ax in atomIndexes: ax[0] = ensurelist(ax[0]) ax[1] = ensurelist(ax[1]) if ax[0] not in uqAtomGroups: uqAtomGroups.append(ax[0]) if ax[1] not in uqAtomGroups: uqAtomGroups.append(ax[1]) uqAtomGroups.sort(key=lambda x: x[0]) # Sort by first atom in each atom list num = len(uqAtomGroups) matrix = np.zeros((num, num), dtype=vector.dtype) mapping = np.ones((num, num), dtype=int) * -1 for i in range(len(vector)): row = uqAtomGroups.index(atomIndexes[i][0]) col = uqAtomGroups.index(atomIndexes[i][1]) matrix[row, col] = vector[i] matrix[col, row] = vector[i] mapping[row, col] = i mapping[col, row] = i return matrix, mapping, uqAtomGroups
def __init__(self, contype, selection, width, restraints, axes='xyz', fbcentre=None, fbcentresel=None): self.type = contype self.selection = selection self.width = ensurelist(width) self.restraints = restraints self.axes = axes self.fbcentre = fbcentre self.fbcentresel = fbcentresel if len(self.width) != 1 and len(self.width) != 3: raise RuntimeError( 'Restraint width must be either a single value or a list of 3 values for the xyz ' 'dimensions') if self.fbcentre is not None: self.fbcentre = ensurelist(self.fbcentre) if len(self.fbcentre) != 3: raise RuntimeError( 'Restraint fbcentre must be a list of 3 values for the xyz coordinates' )
def contactVecToMatrix(vector, atomIndexes): from copy import deepcopy # Calculating the unique atom groups in the mapping uqAtomGroups = [] atomIndexes = deepcopy(list(atomIndexes)) for ax in atomIndexes: ax[0] = ensurelist(ax[0]) ax[1] = ensurelist(ax[1]) if ax[0] not in uqAtomGroups: uqAtomGroups.append(ax[0]) if ax[1] not in uqAtomGroups: uqAtomGroups.append(ax[1]) uqAtomGroups.sort( key=lambda x: x[0]) # Sort by first atom in each atom list num = len(uqAtomGroups) matrix = np.zeros((num, num), dtype=vector.dtype) mapping = np.ones((num, num), dtype=int) * -1 for i in range(len(vector)): row = uqAtomGroups.index(atomIndexes[i][0]) col = uqAtomGroups.index(atomIndexes[i][1]) matrix[row, col] = vector[i] matrix[col, row] = vector[i] mapping[row, col] = i mapping[col, row] = i return matrix, mapping, uqAtomGroups
def _createJobScript(self, fname, workdir, runsh): workdir = os.path.abspath(workdir) with open(fname, 'w') as f: f.write('#!/bin/bash\n') f.write('#\n') f.write('#SBATCH --job-name={}\n'.format(self.jobname)) f.write('#SBATCH --partition={}\n'.format(','.join( ensurelist(self.partition)))) if self.ngpu != 0: f.write('#SBATCH --gres=gpu:{}'.format(self.ngpu)) if self.gpumemory is not None: f.write(',gpu_mem:{}'.format(self.gpumemory)) f.write('\n') f.write('#SBATCH --cpus-per-task={}\n'.format(self.ncpu)) f.write('#SBATCH --mem={}\n'.format(self.memory)) f.write('#SBATCH --priority={}\n'.format(self.priority)) f.write('#SBATCH --workdir={}\n'.format(workdir)) f.write('#SBATCH --output={}\n'.format(self.outputstream)) f.write('#SBATCH --error={}\n'.format(self.errorstream)) if self.envvars is not None: f.write('#SBATCH --export={}\n'.format(self.envvars)) if self.walltime is not None: f.write('#SBATCH --time={}\n'.format(self.walltime)) if self.mailtype is not None and self.mailuser is not None: f.write('#SBATCH --mail-type={}\n'.format(self.mailtype)) f.write('#SBATCH --mail-user={}\n'.format(self.mailuser)) if self.nodelist is not None: f.write('#SBATCH --nodelist={}\n'.format(','.join( ensurelist(self.nodelist)))) if self.exclude is not None: f.write('#SBATCH --exclude={}\n'.format(','.join( ensurelist(self.exclude)))) if self.account is not None: f.write('#SBATCH --account={}\n'.format(self.account)) # Trap kill signals to create sentinel file f.write('\ntrap "touch {}" EXIT SIGTERM\n'.format( os.path.normpath(os.path.join(workdir, self._sentinel)))) f.write('\n') if self.prerun is not None: for call in ensurelist(self.prerun): f.write('{}\n'.format(call)) f.write('\ncd {}\n'.format(workdir)) f.write('{}'.format(runsh)) # Move completed trajectories if self.datadir is not None: datadir = os.path.abspath(self.datadir) if not os.path.isdir(datadir): os.mkdir(datadir) simname = os.path.basename(os.path.normpath(workdir)) # create directory for new file odir = os.path.join(datadir, simname) os.mkdir(odir) f.write('\nmv *.{} {}'.format(self.trajext, odir)) os.chmod(fname, 0o700)
def validate(self, object, basedir=None): classname = self.classname object = ensurelist(object) classname = ensurelist(classname) for obj in object: valid = False for cl in classname: if issubclass(obj, cl): valid = True break if not valid: raise ValueError('Value must be subclass of {}'.format(self.classname)) return object
def show(self, quiet=False): """ Returns the Acemd configuration file string Parameters ---------- quiet : bool If true it prints the string to stdout Returns ------- conf : str The string of the configuration file """ from htmd.util import ensurelist text = '' maxwidth = np.max([len(k) for k in self.__dict__.keys()]) keys = sorted(list(self.__dict__.keys())) keys = keys + [keys.pop(keys.index('restraints')), keys.pop(keys.index('run'))] # Move the run command to the end for cmd in keys: if cmd == 'restraints' and self.restraints is not None: for r in ensurelist(self.restraints): text += '{}\n'.format(r.format(maxwidth)) elif not cmd.startswith('_') and self.__dict__[cmd] is not None: val = self.__dict__[cmd] if cmd in self._outnames: val = self._outnames[cmd] text += '{name: <{maxwidth}}\t{val:}\n'.format(name=cmd, val=val, maxwidth=maxwidth) if not quiet: print(text) else: return text
def _singleMolfile(sims): from htmd.molecule.molecule import mol_equal from htmd.util import ensurelist if isinstance(sims, Molecule): return False, [] elif isinstance(sims, np.ndarray): molfiles = [] for s in sims: molfiles.append(tuple(ensurelist(s.molfile))) uqmolfiles = list(set(molfiles)) if len(uqmolfiles) == 0: raise RuntimeError('No molfiles found in simlist') elif len(uqmolfiles) == 1: return True, uqmolfiles[0] elif len( uqmolfiles ) > 1: # If more than one molfile load them and see if they are different Molecules ref = Molecule(uqmolfiles[0], _logger=False) for i in range(1, len(uqmolfiles)): mol = Molecule(uqmolfiles[i], _logger=False) if not mol_equal(ref, mol, exceptFields=['coords']): return False, [] return True, uqmolfiles[0] return False, []
def __init__(self, contype, selection, width, restraints, axes='xyz', fbcentre=None, fbcentresel=None): self.type = contype self.selection = selection self.width = ensurelist(width) self.restraints = restraints self.axes = axes self.fbcentre = fbcentre self.fbcentresel = fbcentresel if len(self.width) != 1 and len(self.width) != 3: raise RuntimeError('Restraint width must be either a single value or a list of 3 values for the xyz ' 'dimensions') if self.fbcentre is not None: self.fbcentre = ensurelist(self.fbcentre) if len(self.fbcentre) != 3: raise RuntimeError('Restraint fbcentre must be a list of 3 values for the xyz coordinates')
def validate(self, object, basedir=None): classname = self.classname object = ensurelist(object) classname = ensurelist(classname) for obj in object: valid = False for cl in classname: if issubclass(obj, cl): valid = True break if not valid: raise ValueError('Value must be subclass of {}'.format( self.classname)) return object
def __init__(self, plumed_inp): # I am not sure at all about opening files here is good style self._precalculation_enabled = False self._plumed_exe = shutil.which("plumed") self.colvar = None self.cvnames = None self.stmt = None try: pp = _getPlumedRoot() logger.info("Plumed path is " + pp) except Exception as e: raise Exception( "To use MetricPlumed2 please ensure PLUMED 2's executable is installed and in path" ) # Sanitize if single element if type(plumed_inp) == str: self._plumed_inp = plumed_inp else: # This should keep the CVs etc in scope self.stmt = PlumedStatement() self.stmt.prereq = ensurelist(plumed_inp) stmts = _printDFS(self.stmt) self._plumed_inp = "\n".join(stmts)
def _filterTopology(sim, outfolder, filtsel): from htmd.util import ensurelist try: from moleculekit.molecule import Molecule mol = Molecule(sim.molfile) except IOError as e: raise RuntimeError( "simFilter: {}. Cannot read topology file {}".format( e, sim.molfile)) if ( mol.coords.size == 0 ): # If we read for example psf or prmtop which have no coords, just add 0s everywhere mol.coords = np.zeros((mol.numAtoms, 3, 1), dtype=np.float32) extensions = [ "pdb", "psf", ] # Adding pdb and psf to make sure they are always written for m in ensurelist(sim.molfile): extensions.append(os.path.splitext(m)[1][1:]) for ext in list(set(extensions)): filttopo = path.join(outfolder, "filtered.{}".format(ext)) if not path.isfile(filttopo): try: mol.write(filttopo, filtsel) except Exception as e: logger.warning( "Filtering was not able to write {} due to error: {}". format(filttopo, e))
def _createJobScript(self, fname, workdir, runsh): from htmd.util import ensurelist workdir = os.path.abspath(workdir) with open(fname, 'w') as f: f.write('#!/bin/bash\n') f.write('#\n') f.write('#SBATCH --job-name={}\n'.format(self.jobname)) f.write('#SBATCH --partition={}\n'.format(self.partition)) if self.ngpu != 0: f.write('#SBATCH --gres=gpu:{}'.format(self.ngpu)) if self.gpumemory is not None: f.write(',gpu_mem:{}'.format(self.gpumemory)) f.write('\n') f.write('#SBATCH --cpus-per-task={}\n'.format(self.ncpu)) f.write('#SBATCH --mem={}\n'.format(self.memory)) f.write('#SBATCH --priority={}\n'.format(self.priority)) f.write('#SBATCH --workdir={}\n'.format(workdir)) f.write('#SBATCH --output={}\n'.format(self.outputstream)) f.write('#SBATCH --error={}\n'.format(self.errorstream)) if self.environment is not None: f.write('#SBATCH --export={}\n'.format(self.environment)) if self.walltime is not None: f.write('#SBATCH --time={}\n'.format(self.walltime)) if self.mailtype is not None and self.mailuser is not None: f.write('#SBATCH --mail-type={}\n'.format(self.mailtype)) f.write('#SBATCH --mail-user={}\n'.format(self.mailuser)) if self.nodelist is not None: f.write('#SBATCH --nodelist={}\n'.format(','.join(ensurelist(self.nodelist)))) if self.exclude is not None: f.write('#SBATCH --exclude={}\n'.format(','.join(ensurelist(self.exclude)))) # Trap kill signals to create sentinel file f.write('\ntrap "touch {}" EXIT SIGTERM\n'.format(os.path.normpath(os.path.join(workdir, self._sentinel)))) f.write('\ncd {}\n'.format(workdir)) f.write('{}'.format(runsh)) # Move completed trajectories if self.datadir is not None: datadir = os.path.abspath(self.datadir) if not os.path.isdir(datadir): os.mkdir(datadir) simname = os.path.basename(os.path.normpath(workdir)) # create directory for new file odir = os.path.join(datadir, simname) os.mkdir(odir) f.write('\nmv *.{} {}'.format(self.trajext, odir)) os.chmod(fname, 0o700)
def _createJobScript(self, fname, workdir, runsh): from htmd.util import ensurelist workdir = os.path.abspath(workdir) with open(fname, 'w') as f: f.write('#!/bin/bash\n') f.write('#\n') f.write('#BSUB -J {}\n'.format(self.jobname)) f.write('#BSUB -q "{}"\n'.format(' '.join(ensurelist(self.queue)))) f.write('#BSUB -n {}\n'.format(self.ncpu)) if self.app is not None: f.write('#BSUB -app {}\n'.format(self.app)) if self.ngpu != 0: if self.version == 9: if self.gpu_options is not None: logger.warning('gpu_options argument was set while it is not needed for LSF version 9') f.write('#BSUB -R "select[ngpus>0] rusage[ngpus_excl_p={}]"\n'.format(self.ngpu)) elif self.version == 10: if not self.gpu_options: self.gpu_options = {'mode': 'exclusive_process'} gpu_requirements = list() gpu_requirements.append('num={}'.format(self.ngpu)) for i in self.gpu_options: gpu_requirements.append('{}={}'.format(i, self.gpu_options[i])) f.write('#BSUB -gpu "{}"\n'.format(':'.join(gpu_requirements))) else: raise AttributeError('Version not supported') f.write('#BSUB -M {}\n'.format(self.memory)) f.write('#BSUB -cwd {}\n'.format(workdir)) f.write('#BSUB -outdir {}\n'.format(workdir)) f.write('#BSUB -o {}\n'.format(self.outputstream)) f.write('#BSUB -e {}\n'.format(self.errorstream)) if self.envvars is not None: f.write('#BSUB --env {}\n'.format(self.envvars)) if self.walltime is not None: f.write('#BSUB -W {}\n'.format(self.walltime)) if self.resources is not None: for resource in self.resources: f.write('#BSUB -R "{}"\n'.format(resource)) # Trap kill signals to create sentinel file f.write('\ntrap "touch {}" EXIT SIGTERM\n'.format(os.path.normpath(os.path.join(workdir, self._sentinel)))) f.write('\n') if self.prerun is not None: for call in self.prerun: f.write('{}\n'.format(call)) f.write('\ncd {}\n'.format(workdir)) f.write('{}'.format(runsh)) # Move completed trajectories if self.datadir is not None: datadir = os.path.abspath(self.datadir) if not os.path.isdir(datadir): os.mkdir(datadir) simname = os.path.basename(os.path.normpath(workdir)) # create directory for new file odir = os.path.join(datadir, simname) os.mkdir(odir) f.write('\nmv *.{} {}'.format(self.trajext, odir)) os.chmod(fname, 0o700)
def getSegIdx(m, mseg): # Calculate the atoms which belong to the selected segments if isinstance(mseg, str) and mseg == 'protein': msegidx = m.atomselect('protein and name CA') else: msegidx = np.zeros(m.numAtoms, dtype=bool) for seg in ensurelist(mseg): msegidx |= (m.segid == seg) & (m.name == 'CA') return np.where(msegidx)[0]
def _dihedralAtomsPrecalc(self, mol, protsel): protatoms = mol.atomselect(protsel) if self._dihedrals is None: # Default phi psi dihedrals dihedrals = Dihedral.proteinDihedrals(mol, protsel) else: from htmd.util import ensurelist self._dihedrals = ensurelist(self._dihedrals) dihedrals = self._dihedrals return Dihedral.dihedralsToIndexes(mol, dihedrals, protatoms)
def stop(self): """ Cancels all currently running and queued jobs """ import getpass if self.queue is None: raise ValueError('The queue needs to be defined.') user = getpass.getuser() for q in ensurelist(self.queue): cmd = [self._qcancel, '-J', self.jobname, '-u', user, '-q', q] logger.debug(cmd) ret = check_output(cmd, stderr=DEVNULL) logger.debug(ret.decode("ascii"))
def writePRM(mol, parameters, filename): from htmd.version import version as htmdversion from htmd.util import ensurelist # for type, val in parameters.atom_types.items(): # if val.epsilon_14 != 1.0: # raise ValueError("Can't express 1-4 electrostatic scaling in Charmm file format") f = open(filename, "w") print("* prm file built by HTMD parameterize version {}".format(htmdversion()), file=f) print("*\n", file=f) print("BONDS", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.bonds], 'bond_types') for type in types: val = getParameter(type, parameters.bond_types) print("%-6s %-6s %8.2f %8.4f" % (type[0], type[1], val.k, val.req), file=f) print("\nANGLES", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.angles], 'angle_types') for type in types: val = getParameter(type, parameters.angle_types) print("%-6s %-6s %-6s %8.2f %8.2f" % (type[0], type[1], type[2], val.k, val.theteq), file=f) print("\nDIHEDRALS", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.dihedrals], 'dihedral_types') for type in types: val = getParameter(type, parameters.dihedral_types) for term in val: print("%-6s %-6s %-6s %-6s %12.8f %d %12.8f" % (type[0], type[1], type[2], type[3], term.phi_k, term.per, term.phase), file=f) print("\nIMPROPER", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.impropers], 'improper_types') for type in types: type, field = findImproperType(type, parameters) val = parameters.__dict__[field][type] if field == 'improper_periodic_types': for term in ensurelist(val): print("%-6s %-6s %-6s %-6s %12.8f %d %12.8f" % (type[0], type[1], type[2], type[3], term.phi_k, term.per, term.phase), file=f) elif field == 'improper_types': print("%-6s %-6s %-6s %-6s %12.8f %d %12.8f" % (type[0], type[1], type[2], type[3], val.psi_k, 0, val.psi_eq), file=f) print("\nNONBONDED nbxmod 5 atom cdiel shift vatom vdistance vswitch -", file=f) print("cutnb 14.0 ctofnb 12.0 ctonnb 10.0 eps 1.0 e14fac 1.0 wmin 1.5", file=f) types = getSortedAndUniqueTypes(mol.atomtype, 'atom_types') for type in types: val = parameters.atom_types[type] if val.epsilon_14 != val.epsilon: print("%-6s 0.0000 %8.4f %8.4f 0.0000 %8.4f %8.4f" % (type, val.epsilon, val.rmin, val.epsilon_14, val.rmin_14), file=f) else: print("%-6s 0.0000 %8.4f %8.4f" % (type, val.epsilon, val.rmin), file=f) f.close()
def writePRM(mol, parameters, filename): from htmd.version import version as htmdversion from htmd.util import ensurelist # for type, val in parameters.atom_types.items(): # if val.epsilon_14 != 1.0: # raise ValueError("Can't express 1-4 electrostatic scaling in Charmm file format") f = open(filename, "w") print("* prm file built by HTMD parameterize version {}".format(htmdversion()), file=f) print("*\n", file=f) print("BONDS", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.bonds], 'bond_types') for type in types: val = parameters.bond_types[type] print("%-6s %-6s %8.2f %8.4f" % (type[0], type[1], val.k, val.req), file=f) print("\nANGLES", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.angles], 'angle_types') for type in types: val = parameters.angle_types[type] print("%-6s %-6s %-6s %8.2f %8.2f" % (type[0], type[1], type[2], val.k, val.theteq), file=f) print("\nDIHEDRALS", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.dihedrals], 'dihedral_types') for type in types: val = parameters.dihedral_types[type] for term in val: print("%-6s %-6s %-6s %-6s %12.8f %d %12.8f" % (type[0], type[1], type[2], type[3], term.phi_k, term.per, term.phase), file=f) print("\nIMPROPER", file=f) types = getSortedAndUniqueTypes(mol.atomtype[mol.impropers], 'improper_types') for type in types: val, field = getImproperParameter(type, parameters) if field == 'improper_periodic_types': for term in ensurelist(val): print("%-6s %-6s %-6s %-6s %12.8f %d %12.8f" % (type[0], type[1], type[2], type[3], term.phi_k, term.per, term.phase), file=f) elif field == 'improper_types': print("%-6s %-6s %-6s %-6s %12.8f %d %12.8f" % (type[0], type[1], type[2], type[3], val.psi_k, 0, val.psi_eq), file=f) print("\nNONBONDED nbxmod 5 atom cdiel shift vatom vdistance vswitch -", file=f) print("cutnb 14.0 ctofnb 12.0 ctonnb 10.0 eps 1.0 e14fac 1.0 wmin 1.5", file=f) types = getSortedAndUniqueTypes(mol.atomtype, 'atom_types') for type in types: val = parameters.atom_types[type] if val.epsilon_14 != val.epsilon: print("%-6s 0.0000 %8.4f %8.4f 0.0000 %8.4f %8.4f" % (type, val.epsilon, val.rmin, val.epsilon_14, val.rmin_14), file=f) else: print("%-6s 0.0000 %8.4f %8.4f" % (type, val.epsilon, val.rmin), file=f) f.close()
def stop(self): """ Cancels all currently running and queued jobs """ import getpass if self.partition is None: raise ValueError('The partition needs to be defined.') if self.jobname is None: raise ValueError('The jobname needs to be defined.') user = getpass.getuser() for q in ensurelist(self.partition): cmd = [self._qcancel, '-n', self.jobname, '-u', user, '-p', q] logger.debug(cmd) ret = check_output(cmd) logger.debug(ret.decode("ascii"))
def _createJobScript(self, fname, workdir, runsh): from htmd.util import ensurelist workdir = os.path.abspath(workdir) with open(fname, 'w') as f: f.write('#!/bin/bash\n') f.write('#\n') f.write('#BSUB -J {}\n'.format(self.jobname)) f.write('#BSUB -q "{}"\n'.format(' '.join(ensurelist(self.queue)))) f.write('#BSUB -n {}\n'.format(self.ncpu)) if self.app is not None: f.write('#BSUB -app {}\n'.format(self.app)) if self.ngpu != 0: f.write('#BSUB -R "select[ngpus>0] rusage[ngpus_excl_p={}]"\n'. format(self.ngpu)) f.write('#BSUB -M {}\n'.format(self.memory)) f.write('#BSUB -cwd {}\n'.format(workdir)) f.write('#BSUB -outdir {}\n'.format(workdir)) f.write('#BSUB -o {}\n'.format(self.outputstream)) f.write('#BSUB -e {}\n'.format(self.errorstream)) if self.envvars is not None: f.write('#BSUB --env {}\n'.format(self.envvars)) if self.walltime is not None: f.write('#BSUB -W {}\n'.format(self.walltime)) if self.resources is not None: for resource in self.resources: f.write('#BSUB -R "{}"\n'.format(resource)) # Trap kill signals to create sentinel file f.write('\ntrap "touch {}" EXIT SIGTERM\n'.format( os.path.normpath(os.path.join(workdir, self._sentinel)))) f.write('\n') if self.prerun is not None: for call in self.prerun: f.write('{}\n'.format(call)) f.write('\ncd {}\n'.format(workdir)) f.write('{}'.format(runsh)) # Move completed trajectories if self.datadir is not None: datadir = os.path.abspath(self.datadir) if not os.path.isdir(datadir): os.mkdir(datadir) simname = os.path.basename(os.path.normpath(workdir)) # create directory for new file odir = os.path.join(datadir, simname) os.mkdir(odir) f.write('\nmv *.{} {}'.format(self.trajext, odir)) os.chmod(fname, 0o700)
def show(self, quiet=False): """Returns the Acemd configuration file string Parameters ---------- quiet : bool If true it prints the string to stdout Returns ------- conf : str The string of the configuration file """ text = "" if "TCL" in self.__dict__ and self.__dict__["TCL"] is not None: text = self.__dict__["TCL"] text += "#\n" maxwidth = np.max([len(k) for k in self.__dict__.keys()]) keys = sorted(list(self.__dict__.keys())) if "restraints" in keys: keys += [keys.pop(keys.index("restraints"))] keys += [keys.pop(keys.index("run")) ] # Move the run command to the end for cmd in keys: if cmd == "restraints" and self.restraints is not None: for r in ensurelist(self.restraints): text += "{}\n".format(r.format(maxwidth)) elif (not cmd.startswith("_") and self.__dict__[cmd] is not None and cmd != "TCL"): val = self.__dict__[cmd] if cmd in self._outnames: val = self._outnames[cmd] name = cmd if ( cmd == "scaling14" ): # variables cannot start with numbers. We need to rename it here for acemd name = "1-4scaling" text += "{name: <{maxwidth}}\t{val:}\n".format( name=name, val=val, maxwidth=maxwidth) if not quiet: print(text) else: return text
def readConfig(self, configfile): import json if not os.path.exists(configfile): from htmd.home import home configfile = os.path.join(home(), 'mdengine', 'acemd', 'config', '{}.json'.format(configfile)) with open(configfile, 'r') as f: config = json.load(f) for key in config: if key == 'restraints': self.restraints = [] for restr in ensurelist(config[key]['value']): self.restraints.append(_Restraint._fromDict(restr)) else: setattr(self, key, config[key]['value'])
def show(self, quiet=False): """ Returns the Acemd configuration file string Parameters ---------- quiet : bool If true it prints the string to stdout Returns ------- conf : str The string of the configuration file """ text = '' if 'TCL' in self.__dict__ and self.__dict__['TCL'] is not None: text = self.__dict__['TCL'] text += '#\n' maxwidth = np.max([len(k) for k in self.__dict__.keys()]) keys = sorted(list(self.__dict__.keys())) if 'restraints' in keys: keys += [keys.pop(keys.index('restraints'))] keys += [keys.pop(keys.index('run')) ] # Move the run command to the end for cmd in keys: if cmd == 'restraints' and self.restraints is not None: for r in ensurelist(self.restraints): text += '{}\n'.format(r.format(maxwidth)) elif not cmd.startswith( '_') and self.__dict__[cmd] is not None and cmd != 'TCL': val = self.__dict__[cmd] if cmd in self._outnames: val = self._outnames[cmd] name = cmd if cmd == 'scaling14': # variables cannot start with numbers. We need to rename it here for acemd name = '1-4scaling' text += '{name: <{maxwidth}}\t{val:}\n'.format( name=name, val=val, maxwidth=maxwidth) if not quiet: print(text) else: return text
def inprogress(self): """ Returns the sum of the number of running and queued workunits of the specific group in the engine. Returns ------- total : int Total running and queued workunits """ import time import getpass if self.queue is None: raise ValueError('The queue needs to be defined.') if self.jobname is None: raise ValueError('The jobname needs to be defined.') user = getpass.getuser() l_total = 0 for q in ensurelist(self.queue): cmd = [self._qstatus, '-J', self.jobname, '-u', user, '-q', q] logger.debug(cmd) # This command randomly fails so I need to allow it to repeat or it crashes adaptive tries = 0 while tries < 3: try: ret = check_output(cmd, stderr=DEVNULL) except CalledProcessError: if tries == 2: raise tries += 1 time.sleep(3) continue break logger.debug(ret.decode("ascii")) # TODO: check lines and handle errors l = ret.decode("ascii").split("\n") l = len(l) - 2 if l < 0: l = 0 # something odd happened l_total += l return l_total
def load(self, path="."): """Loads all files required to run a simulation and apply eventually configured protocols to it Parameters ---------- path : str Working directory relative to which the configuration file is read """ # load files and reset filenames for cmd in self._defaultfnames.keys(): if cmd in self.__dict__ and self.__dict__[cmd] is not None: found = False for fname in ensurelist(self.__dict__[cmd]): fpath = os.path.join(path, fname) if not os.path.exists(fpath): continue f = open(fpath, "rb") # read all as binary data = f.read() f.close() self._file_data[cmd] = data defaultname = self._defaultfnames[cmd] if defaultname.endswith("*"): defaultname = "{}.{}".format( os.path.splitext(defaultname)[0], os.path.splitext(fpath)[1][1:], ) self._outnames[cmd] = defaultname self.__dict__[cmd] = fname found = True break if not found: raise RuntimeError( 'Could not find any of the files "{}" specified for command "{}" ' "in path {}".format(self.__dict__[cmd], cmd, path)) self._amberConfig() # Change stuff for AMBER if self.thermostattemperature is None: self.thermostattemperature = self.temperature
def show(self, quiet=False): """ Returns the Acemd configuration file string Parameters ---------- quiet : bool If true it prints the string to stdout Returns ------- conf : str The string of the configuration file """ text = '' if 'TCL' in self.__dict__ and self.__dict__['TCL'] is not None: text = self.__dict__['TCL'] text += '#\n' maxwidth = np.max([len(k) for k in self.__dict__.keys()]) keys = sorted(list(self.__dict__.keys())) if 'restraints' in keys: keys += [keys.pop(keys.index('restraints'))] keys += [keys.pop(keys.index('run'))] # Move the run command to the end for cmd in keys: if cmd == 'restraints' and self.restraints is not None: for r in ensurelist(self.restraints): text += '{}\n'.format(r.format(maxwidth)) elif not cmd.startswith('_') and self.__dict__[cmd] is not None and cmd != 'TCL': val = self.__dict__[cmd] if cmd in self._outnames: val = self._outnames[cmd] name = cmd if cmd == 'scaling14': # variables cannot start with numbers. We need to rename it here for acemd name = '1-4scaling' text += '{name: <{maxwidth}}\t{val:}\n'.format(name=name, val=val, maxwidth=maxwidth) if not quiet: print(text) else: return text
def dihedralsToIndexes(mol, dihedrals, sel='all'): """ Converts dihedral objects to atom indexes of a given Molecule Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A Molecule object from which to obtain atom information dihedrals : list A single dihedral or a list of Dihedral objects sel : str Atom selection string to restrict the application of the selections. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ Returns ------- indexes : list of lists A list containing a list of atoms that correspond to each dihedral. Examples -------- >>> dihs = [] >>> dihs.append(Dihedral.phi(mol, 1, 2)) >>> dihs.append(Dihedral.psi(mol, 2, 3)) >>> indexes = Dihedral.dihedralsToIndexes(mol, dihs) """ selatoms = mol.atomselect(sel) from htmd.util import ensurelist indexes = [] for dih in ensurelist(dihedrals): idx = [] for a in dih.atoms: atomsel = (mol.name == a['name']) & (mol.resid == a['resid']) & (mol.insertion == a['insertion']) & \ (mol.chain == a['chain']) & (mol.segid == a['segid']) atomsel = atomsel & selatoms if np.sum(atomsel) != 1: raise RuntimeError( 'Expected one atom from atomselection {}. Got {} instead.' .format(a, np.sum(atomsel))) idx.append(np.where(atomsel)[0][0]) indexes.append(idx) return indexes
def dihedralsToIndexes(mol, dihedrals, sel='all'): """ Converts dihedral objects to atom indexes of a given Molecule Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object A Molecule object from which to obtain atom information dihedrals : list A single dihedral or a list of Dihedral objects sel : str Atom selection string to restrict the application of the selections. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ Returns ------- indexes : list of lists A list containing a list of atoms that correspond to each dihedral. Examples -------- >>> dihs = [] >>> dihs.append(Dihedral.phi(mol, 1, 2)) >>> dihs.append(Dihedral.psi(mol, 2, 3)) >>> indexes = Dihedral.dihedralsToIndexes(mol, dihs) """ selatoms = mol.atomselect(sel) from htmd.util import ensurelist indexes = [] for dih in ensurelist(dihedrals): idx = [] for a in dih.atoms: atomsel = (mol.name == a['name']) & (mol.resid == a['resid']) & (mol.insertion == a['insertion']) & \ (mol.chain == a['chain']) & (mol.segid == a['segid']) atomsel = atomsel & selatoms if np.sum(atomsel) != 1: raise RuntimeError( 'Expected one atom from atomselection {}. Got {} instead.'.format(a, np.sum(atomsel))) idx.append(np.where(atomsel)[0][0]) indexes.append(idx) return indexes
def _filterTopology(sim, outfolder, filtsel): from htmd.util import ensurelist try: from htmd.molecule.molecule import Molecule mol = Molecule(sim.molfile) except IOError as e: raise RuntimeError('simFilter: {}. Cannot read topology file {}'.format(e, sim.molfile)) if mol.coords.size == 0: # If we read for example psf or prmtop which have no coords, just add 0s everywhere mol.coords = np.zeros((mol.numAtoms, 3, 1), dtype=np.float32) extensions = ['pdb',] # Adding pdb to make sure it's always written for m in ensurelist(sim.molfile): extensions.append(os.path.splitext(m)[1][1:]) for ext in list(set(extensions)): filttopo = path.join(outfolder, 'filtered.{}'.format(ext)) if not path.isfile(filttopo): try: mol.write(filttopo, filtsel) except Exception as e: logger.warning('Filtering was not able to write {} due to error: {}'.format(filttopo, e))
def load(self, path='.'): """ Loads all files required to run a simulation and apply eventually configured protocols to it Parameters ---------- path : str Working directory relative to which the configuration file is read """ # load files and reset filenames for cmd in self._defaultfnames.keys(): if cmd in self.__dict__ and self.__dict__[cmd] is not None: found = False for fname in ensurelist(self.__dict__[cmd]): fpath = os.path.join(path, fname) if not os.path.exists(fpath): continue f = open(fpath, 'rb') # read all as binary data = f.read() f.close() self._file_data[cmd] = data defaultname = self._defaultfnames[cmd] if defaultname.endswith('*'): defaultname = '{}.{}'.format(os.path.splitext(defaultname)[0], os.path.splitext(fpath)[1][1:]) self._outnames[cmd] = defaultname self.__dict__[cmd] = fname found = True break if not found: raise RuntimeError('Could not find any of the files "{}" specified for command "{}" ' 'in path {}'.format(self.__dict__[cmd], cmd, path)) self._amberConfig() # Change stuff for AMBER if self._version == 3 and self.thermostattemp is None: self.thermostattemp = self.temperature
def __init__(self, plumed_inp): # I am not sure at all about opening files here is good style self._precalculation_enabled = False self._plumed_exe = shutil.which("plumed") self.colvar = None self.cvnames = None self.stmt = None try: pp = _getPlumedRoot() logger.info("Plumed path is " + pp) except Exception as e: raise Exception("To use MetricPlumed2 please ensure PLUMED 2's executable is installed and in path") # Sanitize if single element if type(plumed_inp) == str: self._plumed_inp = plumed_inp else: # This should keep the CVs etc in scope self.stmt = PlumedStatement() self.stmt.prereq = ensurelist(plumed_inp) stmts = _printDFS(self.stmt) self._plumed_inp = "\n".join(stmts)
def readConfig(self, configfile): import json if not os.path.exists(configfile): from htmd.home import home configfile = os.path.join( home(shareDir=True), "mdengine", "acemd", "config", "{}.json".format(configfile), ) with open(configfile, "r") as f: config = json.load(f) for key in config: if key == "restraints": self.restraints = [] for restr in ensurelist(config[key]["value"]): self.restraints.append(_Restraint._fromDict(restr)) else: setattr(self, key, config[key]["value"])
def _singleMolfile(sims): from htmd.molecule.molecule import mol_equal from htmd.util import ensurelist if isinstance(sims, Molecule): return False, [] elif isinstance(sims, np.ndarray): molfiles = [] for s in sims: molfiles.append(tuple(ensurelist(s.molfile))) uqmolfiles = list(set(molfiles)) if len(uqmolfiles) == 0: raise RuntimeError('No molfiles found in simlist') elif len(uqmolfiles) == 1: return True, uqmolfiles[0] elif len(uqmolfiles) > 1: # If more than one molfile load them and see if they are different Molecules ref = Molecule(uqmolfiles[0], _logger=False) for i in range(1, len(uqmolfiles)): mol = Molecule(uqmolfiles[i], _logger=False) if not mol_equal(ref, mol, exceptFields=['coords']): return False, [] return True, uqmolfiles[0] return False, []
def simlist(datafolders, topologies, inputfolders=None): """Creates a list of simulations Parameters ---------- datafolders : str list A list of directories, each containing a single trajectory topologies : str list A list of topology files or folders containing a topology file corresponding to the trajectories in dataFolders. Can also be a single string to a single structure which corresponds to all trajectories. inputfolders : optional, str list A list of directories, each containing the input files used to produce the trajectories in dataFolders Return ------ sims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A list of simulations Examples -------- >>> simlist(glob('./test/data/*/'), glob('./test/input/*/'), glob('./test/input/*/')) >>> simlist(glob('./test/data/*/'), glob('./test/input/*/*.pdb'), glob('./test/input/*/')) """ from htmd.util import ensurelist import natsort if not datafolders: raise FileNotFoundError( "No data folders were given, check your arguments.") if not topologies: raise FileNotFoundError( "No molecule files were given, check your arguments.") topologies = ensurelist(topologies) datafolders = ensurelist(datafolders) for folder in datafolders: if not os.path.isdir(folder): raise NotADirectoryError("{}".format(folder)) if inputfolders: inputfolders = ensurelist(inputfolders) for folder in inputfolders: if not os.path.isdir(folder): raise NotADirectoryError("{}".format(folder)) # I need to match the simulation names inside the globs given. The # reason is that there can be more input folders in the glob than in # the data glob due to not having been retrieved. Hence I need to match # the folder names. # Create a hash map of data folder names datanames = dict() for folder in datafolders: if _simName(folder) in datanames: raise RuntimeError( "Duplicate simulation name detected. Cannot name-match directories." ) datanames[_simName(folder)] = folder molnames = dict() for mol in topologies: if not os.path.exists(mol): raise FileNotFoundError("File {} does not exist".format(mol)) molnames[_simName(mol)] = mol if inputfolders: inputnames = dict() for inputf in inputfolders: inputnames[_simName(inputf)] = inputf logger.debug("Starting listing of simulations.") sims = [] keys = natsort.natsorted(datanames.keys()) i = 0 from tqdm import tqdm for k in tqdm(keys, desc="Creating simlist"): trajectories = _autoDetectTrajectories(datanames[k]) if not trajectories: continue if len(topologies) > 1: if k not in molnames: raise FileNotFoundError( "Did not find molfile with folder name " + k + " in the given glob") molfile = molnames[k] else: molfile = topologies[0] if os.path.isdir(molfile): molfile = _autoDetectTopology(molfile) inputf = [] if inputfolders: if k not in inputnames: raise FileNotFoundError( "Did not find input with folder name " + k + " in the given glob") inputf = inputnames[k] numframes = [_readNumFrames(f) for f in trajectories] sims.append( Sim( simid=i, parent=None, input=inputf, trajectory=trajectories, molfile=molfile, numframes=numframes, )) i += 1 logger.debug("Finished listing of simulations.") return np.array(sims, dtype=object)
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError('Could not find executable: `{}` in the PATH. Cannot build for AMBER.'.format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = {1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3'} f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError('Atom type definitions have to be triplets. Check the AMBER documentation.') f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError('Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError('Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule(mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID((mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError('Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath(os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f))] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + ['Check {} for further information on errors in building.'.format(logpath)]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError('No structure pdb/prmtop file was generated. Check {} for errors in building.'.format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef(totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def build( mol, ff=None, topo=None, param=None, prefix="structure", outdir="./build", caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, teleap=None, teleapimports=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2, ): """Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. teleap : str Path to teLeap executable used to build the system for AMBER teleapimports : list A list of paths to pass to teLeap '-I' flag, i.e. directories to be searched Default: determined from :func:`amber.defaultAmberHome <htmd.builder.amber.defaultAmberHome>` and :func:`amber.htmdAmberHome <htmd.builder.amber.htmdAmberHome>` execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <moleculekit.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if teleap is None: teleap = _findTeLeap() else: if shutil.which(teleap) is None: raise NameError( f"Could not find executable: `{teleap}` in the PATH. Cannot build for AMBER. Please install it with `conda install ambermini -c acellera`" ) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, "tleap.in"), "w") f.write("# tleap file generated by amber.build\n") # Printing out the forcefields for i, force in enumerate(ensurelist(ff)): if not os.path.isfile(force): force = _locateFile(force, "ff", teleap) if force is None: continue newname = f"ff{i}_{os.path.basename(force)}" shutil.copy(force, os.path.join(outdir, newname)) f.write(f"source {newname}\n") f.write("\n") if gbsa: gbmodels = { 1: "mbondi", 2: "mbondi2", 5: "mbondi2", 7: "bondi", 8: "mbondi3" } f.write(f"set default PBradii {gbmodels[igb]}\n\n") # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write("addAtomTypes {\n") for at in atomtypes: if len(at) != 3: raise RuntimeError( "Atom type definitions have to be triplets. Check the AMBER documentation." ) f.write(f' {{ "{at[0]}" "{at[1]}" "{at[2]}" }}\n') f.write("}\n\n") # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for i, off in enumerate(offlibraries): if not os.path.isfile(off): raise RuntimeError( f"Could not find off-library in location {off}") newname = f"offlib{i}_{os.path.basename(off)}" shutil.copy(off, os.path.join(outdir, newname)) f.write(f"loadoff {newname}\n") # Loading frcmod parameters f.write("# Loading parameter files\n") for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, "param", teleap) if p is None: continue newname = f"param{i}_{os.path.basename(p)}" shutil.copy(p, os.path.join(outdir, newname)) f.write(f"loadamberparams {newname}\n") f.write("\n") # Loading prepi topologies f.write("# Loading prepi topologies\n") for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, "topo", teleap) if t is None: continue newname = f"topo{i}_{os.path.basename(t)}" shutil.copy(t, os.path.join(outdir, newname)) f.write(f"loadamberprep {newname}\n") f.write("\n") f.write("# Loading the system\n") f.write("mol = loadpdb input.pdb\n\n") if np.sum(mol.atomtype != "") != 0: f.write("# Loading the ligands\n") segs = np.unique(mol.segid[mol.atomtype != ""]) # teLeap crashes if you try to combine too many molecules in a single command so we will do them by 10s for k in range(0, len(segs), 10): segments_string = "" for seg in segs[k:min(k + 10, len(segs))]: name = f"segment{seg}" segments_string += f" {name}" mol2name = os.path.join(outdir, f"{name}.mol2") mol.write(mol2name, (mol.atomtype != "") & (mol.segid == seg)) if not os.path.isfile(mol2name): raise NameError("Failed writing ligand mol2 file.") f.write(f"{name} = loadmol2 {name}.mol2\n") f.write(f"mol = combine {{mol{segments_string}}}\n\n") # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from moleculekit.molecule import UniqueResidueID if (disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0], DisulfideBridge)): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, f"resid {d.resid1} and segname {d.segid1}") r2 = UniqueResidueID.fromMolecule( mol, f"resid {d.resid2} and segname {d.segid2}") newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if (disulfide is not None and len(disulfide) != 0 and isinstance(disulfide[0][0], str)): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info("Detecting disulfide bonds.") disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write("# Adding disulfide bonds\n") for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = "CYX" mol.resname[atoms2] = "CYX" # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == "HG")) | (atoms2 & (mol.name == "HG")) # Convert to stupid amber residue numbering uqseqid = (sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0]) uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write(f"bond mol.{uqres1}.SG mol.{uqres2}.SG\n") f.write("\n") mol.remove(torem, _logger=False) # Calculate the bounding box and store it in the CRD file f.write('setBox mol "vdw"\n\n') f.write("# Writing out the results\n") f.write(f"saveamberparm mol {prefix}.prmtop {prefix}.crd\n") f.write("quit") f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug("Writing PDB file for input to tleap.") pdbname = os.path.join(outdir, "input.pdb") # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == "") if not os.path.isfile(pdbname): raise NameError( "Could not write a PDB file out of the given Molecule.") molbuilt = None if execute: if not teleapimports: teleapimports = [] # Source default Amber (i.e. the same paths tleap imports) amberhome = defaultAmberHome(teleap=teleap) teleapimports += [ os.path.join(amberhome, s) for s in _defaultAmberSearchPaths.values() ] if len(teleapimports) == 0: raise RuntimeWarning( f"No default Amber force-field found. Check teLeap location: {teleap}" ) # Source HTMD Amber paths that contain ffs htmdamberdir = htmdAmberHome() teleapimports += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] if len(teleapimports) == 0: raise RuntimeError( "No default Amber force-field imports found. Check " "`htmd.builder.amber.defaultAmberHome()` and `htmd.builder.amber.htmdAmberHome()`" ) # Set import flags for teLeap teleapimportflags = [] for p in teleapimports: teleapimportflags.append("-I") teleapimportflags.append(str(p)) logpath = os.path.abspath(os.path.join(outdir, "log.txt")) logger.info("Starting the build.") currdir = os.getcwd() os.chdir(outdir) f = open(logpath, "w") try: cmd = [teleap, "-f", "./tleap.in"] cmd[1:1] = teleapimportflags logger.debug(cmd) call(cmd, stdout=f) except: raise NameError("teLeap failed at execution") f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ f"Check {logpath} for further information on errors in building." ]) logger.info("Finished building.") if (os.path.exists(os.path.join(outdir, "structure.crd")) and os.path.getsize(os.path.join(outdir, "structure.crd")) != 0 and os.path.getsize(os.path.join(outdir, "structure.prmtop")) != 0): try: molbuilt = Molecule(os.path.join(outdir, "structure.prmtop")) molbuilt.read(os.path.join(outdir, "structure.crd")) except Exception as e: raise RuntimeError( f"Failed at reading structure.prmtop/structure.crd due to error: {e}" ) else: raise BuildError( f"No structure pdb/prmtop file was generated. Check {logpath} for errors in building." ) if ionize: shutil.move( os.path.join(outdir, "structure.crd"), os.path.join(outdir, "structure.noions.crd"), ) shutil.move( os.path.join(outdir, "structure.prmtop"), os.path.join(outdir, "structure.noions.prmtop"), ) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect("water and noh")) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation, ) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build( newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, teleap=teleap, atomtypes=atomtypes, offlibraries=offlibraries, ) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, "structure.pdb")) molbuilt.bonds = tmpbonds # Restoring the bonds detectCisPeptideBonds(molbuilt) # Warn in case of cis bonds return molbuilt
_TRAJECTORY_READERS = {'xtc': XTCread} _COORDINATE_READERS = {'crd': CRDread, 'coor': BINCOORread} _MDTRAJ_TRAJECTORY_EXTS = ('dcd', 'binpos', 'trr', 'nc', 'h5', 'lh5', 'netcdf') for ext in _MDTRAJ_TRAJECTORY_EXTS: if ext not in _TRAJECTORY_READERS: _TRAJECTORY_READERS[ext] = MDTRAJread from htmd.util import ensurelist _ALL_READERS = {} for k in _TOPOLOGY_READERS: if k not in _ALL_READERS: _ALL_READERS[k] = [] _ALL_READERS[k] += ensurelist(_TOPOLOGY_READERS[k]) for k in _TRAJECTORY_READERS: if k not in _ALL_READERS: _ALL_READERS[k] = [] _ALL_READERS[k] += ensurelist(_TRAJECTORY_READERS[k]) for k in _COORDINATE_READERS: if k not in _ALL_READERS: _ALL_READERS[k] = [] _ALL_READERS[k] += ensurelist(_COORDINATE_READERS[k]) if __name__ == '__main__': from htmd.home import home from htmd.molecule.molecule import Molecule
def PDBwrite(mol, filename, frames=None, writebonds=True): if frames is None: frames = mol.frame frames = ensurelist(frames) checkTruncations(mol) coords = np.atleast_3d(mol.coords[:, :, frames]) numFrames = coords.shape[2] nAtoms = coords.shape[0] serial = np.arange(1, np.size(coords, 0) + 1).astype(object) serial[serial > 99999] = '*****' serial = serial.astype('U5') if nAtoms > 0: if coords.max() >= 1E8 or coords.min() <= -1E7: raise RuntimeError('Cannot write PDB coordinates with values smaller than -1E7 or larger than 1E8') if mol.occupancy.max() >= 1E6 or mol.occupancy.min() <= -1E5: raise RuntimeError('Cannot write PDB occupancy with values smaller than -1E5 or larger than 1E6') if mol.beta.max() >= 1E6 or mol.beta.min() <= -1E5: raise RuntimeError('Cannot write PDB beta/temperature with values smaller than -1E5 or larger than 1E6') fh = open(filename, 'w') box = mol.box[:, frames[0]] if box is not None and not np.all(mol.box == 0): fh.write("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 \n" % (box[0], box[1], box[2], 90, 90, 90)) for f in range(numFrames): fh.write("MODEL %5d\n" % (frames[f] + 1)) for i in range(0, len(mol.record)): name = _deduce_PDB_atom_name(mol.name[i], mol.resname[i]) fh.write( "{!s:6.6}{!s:>5.5} {}{!s:>1.1}{!s:4.4}{!s:>1.1}{!s:>4.4}{!s:>1.1} {}{}{}{}{} {!s:4.4}{!s:>2.2} \n".format( mol.record[i], serial[i], name, mol.altloc[i], mol.resname[i], mol.chain[i], mol.resid[i], mol.insertion[i], '{:8.3f}'.format(coords[i, 0, f])[:8], '{:8.3f}'.format(coords[i, 1, f])[:8], '{:8.3f}'.format(coords[i, 2, f])[:8], '{:6.2f}'.format(mol.occupancy[i])[:6], '{:6.2f}'.format(mol.beta[i])[:6], mol.segid[i], mol.element[i] ) ) # TODO : convert charges to ints if we ever write them if i < len(mol.record) - 1 and mol.segid[i] != mol.segid[i + 1]: fh.write("TER\n") if writebonds and mol.bonds is not None and len(mol.bonds) != 0: bondedatoms = np.unique(mol.bonds) bondedatoms = bondedatoms[bondedatoms < 99998] # Don't print bonds over 99999 as it overflows the field for a in bondedatoms: partners = mol.bonds[mol.bonds[:, 0] == a, 1] partners = np.unique(np.append(partners, mol.bonds[mol.bonds[:, 1] == a, 0])) partners = partners[partners < 99998] + 1 # Don't print bonds over 99999 as it overflows the field # I need to support multi-line printing of atoms with more than 4 bonds while len(partners) >= 3: # Write bonds as long as they are more than 3 in fast more fh.write("CONECT%5d%5d%5d%5d\n" % (a + 1, partners[0], partners[1], partners[2])) partners = partners[3:] if len(partners) > 0: # Write the rest of the bonds line = "CONECT%5d" % (a + 1) for p in partners: line = "%s%5d" % (line, p) fh.write(line) fh.write('\n') fh.write("ENDMDL\n") fh.write("END\n") fh.close()
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap='tleap', execute=True, atomtypes=None, offlibraries=None): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of :class:`DisulfideBridge <htmd.builder.builder.DisulfideBridge>` objects If None it will guess disulfide bonds. Otherwise provide a list of `DisulfideBridge` objects. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP ... >>> # More complex example >>> disu = [DisulfideBridge('P', 157, 'P', 13), DisulfideBridge('K', 1, 'K', 25)] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) _checkResidueInsertions(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for force in ff: f.write('source ' + force + '\n') f.write('\n') # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: if not isinstance(offlibraries, list) and not isinstance( offlibraries, tuple): offlibraries = [ offlibraries, ] for off in offlibraries: f.write('loadoff {}\n\n'.format(off)) # Loading frcmod parameters f.write('# Loading parameter files\n') for p in param: try: shutil.copy(p, outdir) f.write('loadamberparams ' + os.path.basename(p) + '\n') except: f.write('loadamberparams ' + p + '\n') logger.info( "File {:s} not found, assuming its present on the standard Amber location" .format(p)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for t in topo: shutil.copy(t, outdir) f.write('loadamberprep ' + os.path.basename(t) + '\n') f.write('\n') # Detect disulfide bridges if not defined by user if disulfide is None and not ionize: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if not ionize and len(disulfide) != 0: for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = (mol.segid == d.segid1) & (mol.resid == d.resid1) atoms2 = (mol.segid == d.segid2) & (mol.resid == d.resid2) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) mol.remove(atoms1 & (mol.name == 'HG'), _logger=False) mol.remove(atoms2 & (mol.name == 'HG'), _logger=False) # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize and len(disulfide) != 0: f.write('# Adding disulfide bonds\n') for d in disulfide: # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int( np.unique(uqseqid[(mol.segid == d.segid1) & (mol.resid == d.resid1)])) uqres2 = int( np.unique(uqseqid[(mol.segid == d.segid2) & (mol.resid == d.resid2)])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() os.chdir(currdir) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise NameError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, ff='amber', anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def PDBwrite(mol, filename, frame=None): if frame is None: frame = mol.frame frame = ensurelist(frame) def format83(f): """Format a single float into a string of width 8, with ideally 3 decimal places of precision. If the number is a little too large, we can gracefully degrade the precision by lopping off some of the decimal places. If it's much too large, we throw a NameError""" if -999.999 < f < 9999.999: return '%8.3f' % f if -9999999 < f < 99999999: return ('%8.3f' % f)[:8] raise NameError('coordinate "%s" could not be represented ' 'in a width-8 field' % f) def format62(f): if -9.999 < f < 99.999: return '%6.2f' % f if -99999 < f < 999999: return ('%6.2f' % f)[:6] raise NameError('coordinate "%s" could not be represented ' 'in a width-6 field' % f) checkTruncations(mol) coords = np.atleast_3d(mol.coords[:, :, frame]) numFrames = coords.shape[2] serial = np.arange(1, np.size(coords, 0) + 1) fh = open(filename, 'w') # TODO FIXME -- should take box from traj frame box = mol.box if box is not None and not np.all(mol.box == 0): box = np.atleast_2d(np.atleast_2d(box)[:, mol.frame]) print("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 " % (box[0, 0], box[0, 1], box[0, 2], 90, 90, 90), file=fh) for f in frame: print("MODEL %5d" % (f + 1), file=fh) for i in range(0, len(mol.record)): name = _deduce_PDB_atom_name(mol.name[i], mol.resname[i]) if serial[i] < 100000: ser = str(int(serial[i])) else: ser = '*****' print( "{!s:6.6}{!s:>5.5} {}{!s:>1.1}{!s:4.4}{!s:>1.1}{!s:>4.4}{!s:>1.1} {}{}{}{}{} {!s:4.4}{!s:>2.2} " .format(mol.record[i], ser, name, mol.altloc[i], mol.resname[i], mol.chain[i], mol.resid[i], mol.insertion[i], format83(mol.coords[i, 0, f]), format83(mol.coords[i, 1, f]), format83(mol.coords[i, 2, f]), format62(mol.occupancy[i]), format62(mol.beta[i]), mol.segid[i], mol.element[i]), file=fh) # TODO : convert charges to ints if we ever write them if i < len(mol.record) - 1 and mol.segid[i] != mol.segid[i + 1]: print("TER", file=fh) if mol.bonds is not None and len(mol.bonds) != 0: bondedatoms = np.unique(mol.bonds) bondedatoms = bondedatoms[ bondedatoms < 99998] # Don't print bonds over 99999 as it overflows the field for a in bondedatoms: partners = mol.bonds[mol.bonds[:, 0] == a, 1] partners = np.unique( np.append(partners, mol.bonds[mol.bonds[:, 1] == a, 0])) partners = partners[ partners < 99998] + 1 # Don't print bonds over 99999 as it overflows the field # I need to support multi-line printing of atoms with more than 4 bonds while len( partners ) >= 3: # Write bonds as long as they are more than 3 in fast more print("CONECT%5d%5d%5d%5d" % (a + 1, partners[0], partners[1], partners[2]), file=fh) partners = partners[3:] if len(partners) > 0: # Write the rest of the bonds line = "CONECT%5d" % (a + 1) for p in partners: line = "%s%5d" % (line, p) print(line, file=fh) print("ENDMDL", file=fh) print("END", file=fh) fh.close()
def build(mol, ff=None, topo=None, param=None, prefix='structure', outdir='./build', caps=None, ionize=True, saltconc=0, saltanion=None, saltcation=None, disulfide=None, tleap=None, execute=True, atomtypes=None, offlibraries=None, gbsa=False, igb=2): """ Builds a system for AMBER Uses tleap to build a system for AMBER. Additionally it allows the user to ionize and add disulfide bridges. Parameters ---------- mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The Molecule object containing the system ff : list of str A list of leaprc forcefield files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available forcefield files. Default: :func:`amber.defaultFf <htmd.builder.amber.defaultFf>` topo : list of str A list of topology `prepi/prep/in` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available topology files. Default: :func:`amber.defaultTopo <htmd.builder.amber.defaultTopo>` param : list of str A list of parameter `frcmod` files. Use :func:`amber.listFiles <htmd.builder.amber.listFiles>` to get a list of available parameter files. Default: :func:`amber.defaultParam <htmd.builder.amber.defaultParam>` prefix : str The prefix for the generated pdb and psf files outdir : str The path to the output directory Default: './build' caps : dict A dictionary with keys segids and values lists of strings describing the caps for a particular protein segment. e.g. caps['P'] = ['ACE', 'NME'] or caps['P'] = ['none', 'none']. Default: will apply ACE and NME caps to every protein segment. ionize : bool Enable or disable ionization saltconc : float Salt concentration to add to the system after neutralization. saltanion : {'Cl-'} The anion type. Please use only AMBER ion atom names. saltcation : {'Na+', 'K+', 'Cs+'} The cation type. Please use only AMBER ion atom names. disulfide : list of pairs of atomselection strings If None it will guess disulfide bonds. Otherwise provide a list pairs of atomselection strings for each pair of residues forming the disulfide bridge. tleap : str Path to tleap executable used to build the system for AMBER execute : bool Disable building. Will only write out the input script needed by tleap. Does not include ionization. atomtypes : list of triplets Custom atom types defined by the user as ('type', 'element', 'hybrid') triplets e.g. (('C1', 'C', 'sp2'), ('CI', 'C', 'sp3')). Check `addAtomTypes` in AmberTools docs. offlibraries : str or list A path or a list of paths to OFF library files. Check `loadOFF` in AmberTools docs. gbsa : bool Modify radii for GBSA implicit water model igb : int GB model. Select: 1 for mbondi, 2 and 5 for mbondi2, 7 for bondi and 8 for mbondi3. Check section 4. The Generalized Born/Surface Area Model of the AMBER manual. Returns ------- molbuilt : :class:`Molecule <htmd.molecule.molecule.Molecule>` object The built system in a Molecule object Example ------- >>> from htmd.ui import * # doctest: +SKIP >>> mol = Molecule("3PTB") >>> molbuilt = amber.build(mol, outdir='/tmp/build') # doctest: +SKIP >>> # More complex example >>> disu = [['segid P and resid 157', 'segid P and resid 13'], ['segid K and resid 1', 'segid K and resid 25']] >>> molbuilt = amber.build(mol, outdir='/tmp/build', saltconc=0.15, disulfide=disu) # doctest: +SKIP """ # Remove pdb protein bonds as they can be regenerated by tleap. Keep non-protein bonds i.e. for ligands mol = mol.copy() _removeProteinBonds(mol) if tleap is None: tleap = _findTleap() else: if shutil.which(tleap) is None: raise NameError( 'Could not find executable: `{}` in the PATH. Cannot build for AMBER.' .format(tleap)) if not os.path.isdir(outdir): os.makedirs(outdir) _cleanOutDir(outdir) if ff is None: ff = defaultFf() if topo is None: topo = defaultTopo() if param is None: param = defaultParam() if caps is None: caps = _defaultProteinCaps(mol) _missingSegID(mol) _checkMixedSegment(mol) mol = _charmmLipid2Amber(mol) _applyProteinCaps(mol, caps) f = open(os.path.join(outdir, 'tleap.in'), 'w') f.write('# tleap file generated by amber.build\n') # Printing out the forcefields if isinstance(ff, str): ff = [ff] for i, force in enumerate(ff): if not os.path.isfile(force): force = _locateFile(force, 'ff', tleap) if force is None: continue newname = 'ff{}_{}'.format(i, os.path.basename(force)) shutil.copy(force, os.path.join(outdir, newname)) f.write('source {}\n'.format(newname)) f.write('\n') if gbsa: gbmodels = { 1: 'mbondi', 2: 'mbondi2', 5: 'mbondi2', 7: 'bondi', 8: 'mbondi3' } f.write('set default PBradii {}\n\n'.format(gbmodels[igb])) # Adding custom atom types if atomtypes is not None: atomtypes = ensurelist(tocheck=atomtypes[0], tomod=atomtypes) f.write('addAtomTypes {\n') for at in atomtypes: if len(at) != 3: raise RuntimeError( 'Atom type definitions have to be triplets. Check the AMBER documentation.' ) f.write(' {{ "{}" "{}" "{}" }}\n'.format(at[0], at[1], at[2])) f.write('}\n\n') # Loading OFF libraries if offlibraries is not None: offlibraries = ensurelist(offlibraries) for off in offlibraries: if not os.path.isfile(off): raise RuntimeError( 'Could not find off-library in location {}'.format(off)) newname = 'offlib{}_{}'.format(i, os.path.basename(off)) shutil.copy(off, os.path.join(outdir, newname)) f.write('loadoff {}\n'.format(newname)) # Loading frcmod parameters f.write('# Loading parameter files\n') for i, p in enumerate(param): if not os.path.isfile(p): p = _locateFile(p, 'param', tleap) if p is None: continue newname = 'param{}_{}'.format(i, os.path.basename(p)) shutil.copy(p, os.path.join(outdir, newname)) f.write('loadamberparams {}\n'.format(newname)) f.write('\n') # Loading prepi topologies f.write('# Loading prepi topologies\n') for i, t in enumerate(topo): if not os.path.isfile(t): t = _locateFile(t, 'topo', tleap) if t is None: continue newname = 'topo{}_{}'.format(i, os.path.basename(t)) shutil.copy(t, os.path.join(outdir, newname)) f.write('loadamberprep {}\n'.format(newname)) f.write('\n') f.write('# Loading the system\n') f.write('mol = loadpdb input.pdb\n\n') if np.sum(mol.atomtype != '') != 0: logger.debug('Writing mol2 files for input to tleap.') segs = np.unique(mol.segid[mol.atomtype != '']) combstr = 'mol = combine {mol' for s in segs: name = 'segment{}'.format(s) mol2name = os.path.join(outdir, '{}.mol2'.format(name)) mol.write(mol2name, (mol.atomtype != '') & (mol.segid == s)) if not os.path.isfile(mol2name): raise NameError( 'Could not write a mol2 file out of the given Molecule.') f.write('# Loading the rest of the system\n') f.write('{} = loadmol2 {}.mol2\n\n'.format(name, name)) combstr += ' {}'.format(name) combstr += '}\n\n' f.write(combstr) # Write patches for disulfide bonds (only after ionizing) if not ionize: # TODO: Remove this once we deprecate the class from htmd.builder.builder import DisulfideBridge from htmd.molecule.molecule import UniqueResidueID if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0], DisulfideBridge): newdisu = [] for d in disulfide: r1 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid1, d.segid1)) r2 = UniqueResidueID.fromMolecule( mol, 'resid {} and segname {}'.format(d.resid2, d.segid2)) newdisu.append([r1, r2]) disulfide = newdisu # TODO: Remove up to here ---------------------- if disulfide is not None and len(disulfide) != 0 and isinstance( disulfide[0][0], str): disulfide = convertDisulfide(mol, disulfide) if disulfide is None: logger.info('Detecting disulfide bonds.') disulfide = detectDisulfideBonds(mol) # Fix structure to match the disulfide patching if len(disulfide) != 0: torem = np.zeros(mol.numAtoms, dtype=bool) f.write('# Adding disulfide bonds\n') for d in disulfide: # Rename the residues to CYX if there is a disulfide bond atoms1 = d[0].selectAtoms(mol, indexes=False) atoms2 = d[1].selectAtoms(mol, indexes=False) mol.resname[atoms1] = 'CYX' mol.resname[atoms2] = 'CYX' # Remove (eventual) HG hydrogens on these CYS (from proteinPrepare) torem |= (atoms1 & (mol.name == 'HG')) | (atoms2 & (mol.name == 'HG')) # Convert to stupid amber residue numbering uqseqid = sequenceID( (mol.resid, mol.insertion, mol.segid)) + mol.resid[0] uqres1 = int(np.unique(uqseqid[atoms1])) uqres2 = int(np.unique(uqseqid[atoms2])) f.write('bond mol.{}.SG mol.{}.SG\n'.format(uqres1, uqres2)) f.write('\n') mol.remove(torem, _logger=False) f.write('# Writing out the results\n') f.write('saveamberparm mol ' + prefix + '.prmtop ' + prefix + '.crd\n') f.write('quit') f.close() # Printing and loading the PDB file. AMBER can work with a single PDB file if the segments are separate by TER logger.debug('Writing PDB file for input to tleap.') pdbname = os.path.join(outdir, 'input.pdb') # mol2 files have atomtype, here we only write parts not coming from mol2 # We need to write the input.pdb at the end since we modify the resname for disulfide bridges in mol mol.write(pdbname, mol.atomtype == '') if not os.path.isfile(pdbname): raise NameError( 'Could not write a PDB file out of the given Molecule.') molbuilt = None if execute: # Source paths of extra dirs (our dirs, not amber default) htmdamberdir = os.path.abspath( os.path.join(home(), 'builder', 'amberfiles')) sourcepaths = [htmdamberdir] sourcepaths += [ os.path.join(htmdamberdir, os.path.dirname(f)) for f in ff if os.path.isfile(os.path.join(htmdamberdir, f)) ] extrasource = [] for p in sourcepaths: extrasource.append('-I') extrasource.append('{}'.format(p)) logpath = os.path.abspath(os.path.join(outdir, 'log.txt')) logger.info('Starting the build.') currdir = os.getcwd() os.chdir(outdir) f = open(logpath, 'w') try: cmd = [tleap, '-f', './tleap.in'] cmd[1:1] = extrasource call(cmd, stdout=f) except: raise NameError('tleap failed at execution') f.close() errors = _logParser(logpath) os.chdir(currdir) if errors: raise BuildError(errors + [ 'Check {} for further information on errors in building.'. format(logpath) ]) logger.info('Finished building.') if os.path.exists(os.path.join(outdir, 'structure.crd')) and \ os.path.getsize(os.path.join(outdir, 'structure.crd')) != 0 and \ os.path.getsize(os.path.join(outdir, 'structure.prmtop')) != 0: molbuilt = Molecule(os.path.join(outdir, 'structure.prmtop')) molbuilt.read(os.path.join(outdir, 'structure.crd')) else: raise BuildError( 'No structure pdb/prmtop file was generated. Check {} for errors in building.' .format(logpath)) if ionize: shutil.move(os.path.join(outdir, 'structure.crd'), os.path.join(outdir, 'structure.noions.crd')) shutil.move(os.path.join(outdir, 'structure.prmtop'), os.path.join(outdir, 'structure.noions.prmtop')) totalcharge = np.sum(molbuilt.charge) nwater = np.sum(molbuilt.atomselect('water and noh')) anion, cation, anionatom, cationatom, nanion, ncation = ionizef( totalcharge, nwater, saltconc=saltconc, anion=saltanion, cation=saltcation) newmol = ionizePlace(mol, anion, cation, anionatom, cationatom, nanion, ncation) # Redo the whole build but now with ions included return build(newmol, ff=ff, topo=topo, param=param, prefix=prefix, outdir=outdir, caps={}, ionize=False, execute=execute, saltconc=saltconc, disulfide=disulfide, tleap=tleap, atomtypes=atomtypes, offlibraries=offlibraries) tmpbonds = molbuilt.bonds molbuilt.bonds = [] # Removing the bonds to speed up writing molbuilt.write(os.path.join(outdir, 'structure.pdb')) molbuilt.bonds = tmpbonds # Restoring the bonds return molbuilt
def simlist(datafolders, topologies, inputfolders=None): """Creates a list of simulations Parameters ---------- datafolders : str list A list of directories, each containing a single trajectory topologies : str list A list of topology files or folders containing a topology file corresponding to the trajectories in dataFolders. Can also be a single string to a single structure which corresponds to all trajectories. inputfolders : optional, str list A list of directories, each containing the input files used to produce the trajectories in dataFolders Return ------ sims : np.ndarray of :class:`Sim <htmd.simlist.Sim>` objects A list of simulations Examples -------- >>> simlist(glob('./test/data/*/'), glob('./test/input/*/'), glob('./test/input/*/')) >>> simlist(glob('./test/data/*/'), glob('./test/input/*/*.pdb'), glob('./test/input/*/')) """ from htmd.util import ensurelist import natsort if not datafolders: raise FileNotFoundError('No data folders were given, check your arguments.') if not topologies: raise FileNotFoundError('No molecule files were given, check your arguments.') topologies = ensurelist(topologies) datafolders = ensurelist(datafolders) for folder in datafolders: if not os.path.isdir(folder): raise NotADirectoryError('{}'.format(folder)) if inputfolders: inputfolders = ensurelist(inputfolders) for folder in inputfolders: if not os.path.isdir(folder): raise NotADirectoryError('{}'.format(folder)) # I need to match the simulation names inside the globs given. The # reason is that there can be more input folders in the glob than in # the data glob due to not having been retrieved. Hence I need to match # the folder names. # Create a hash map of data folder names datanames = dict() for folder in datafolders: if _simName(folder) in datanames: raise RuntimeError('Duplicate simulation name detected. Cannot name-match directories.') datanames[_simName(folder)] = folder molnames = dict() for mol in topologies: if not os.path.exists(mol): raise FileNotFoundError('File {} does not exist'.format(mol)) molnames[_simName(mol)] = mol if inputfolders: inputnames = dict() for inputf in inputfolders: inputnames[_simName(inputf)] = inputf logger.debug('Starting listing of simulations.') sims = [] keys = natsort.natsorted(datanames.keys()) i = 0 from tqdm import tqdm for k in tqdm(keys, desc='Creating simlist'): trajectories = _autoDetectTrajectories(datanames[k]) if not trajectories: continue if len(topologies) > 1: if k not in molnames: raise FileNotFoundError('Did not find molfile with folder name ' + k + ' in the given glob') molfile = molnames[k] else: molfile = topologies[0] if os.path.isdir(molfile): molfile = _autoDetectTopology(molfile) inputf = [] if inputfolders: if k not in inputnames: raise FileNotFoundError('Did not find input with folder name ' + k + ' in the given glob') inputf = inputnames[k] numframes = [_readNumFrames(f) for f in trajectories] sims.append(Sim(simid=i, parent=None, input=inputf, trajectory=trajectories, molfile=molfile, numframes=numframes)) i += 1 logger.debug('Finished listing of simulations.') return np.array(sims, dtype=object)