def computePeriodicBoxVectors(a_length, b_length, c_length, alpha, beta, gamma): """Convert lengths and angles to periodic box vectors. Lengths should be given in nanometers and angles in radians (or as Quantity instances) """ if is_quantity(a_length): a_length = a_length.value_in_unit(nanometers) if is_quantity(b_length): b_length = b_length.value_in_unit(nanometers) if is_quantity(c_length): c_length = c_length.value_in_unit(nanometers) if is_quantity(alpha): alpha = alpha.value_in_unit(radians) if is_quantity(beta): beta = beta.value_in_unit(radians) if is_quantity(gamma): gamma = gamma.value_in_unit(radians) # Compute the vectors. a = [a_length, 0, 0] b = [b_length * math.cos(gamma), b_length * math.sin(gamma), 0] cx = c_length * math.cos(beta) cy = c_length * (math.cos(alpha) - math.cos(beta) * math.cos(gamma)) / math.sin(gamma) cz = math.sqrt(c_length * c_length - cx * cx - cy * cy) c = [cx, cy, cz] # If any elements are very close to 0, set them to exactly 0. for i in range(3): if abs(a[i]) < 1e-6: a[i] = 0.0 if abs(b[i]) < 1e-6: b[i] = 0.0 if abs(c[i]) < 1e-6: c[i] = 0.0 a = Vec3(*a) b = Vec3(*b) c = Vec3(*c) # Make sure they're in the reduced form required by OpenMM. c = c - b * round(c[1] / b[1]) c = c - a * round(c[0] / a[0]) b = b - a * round(b[0] / a[0]) return (a, b, c) * nanometers
def getUnitCellDimensions(self, frame=0): """Get the dimensions of the crystallographic unit cell. Parameters: - frame (int=0) the index of the frame for which to get the unit cell dimensions """ xsize = self._periodicBoxVectors[frame][0][0].value_in_unit(nanometers) ysize = self._periodicBoxVectors[frame][1][1].value_in_unit(nanometers) zsize = self._periodicBoxVectors[frame][2][2].value_in_unit(nanometers) return Vec3(xsize, ysize, zsize) * nanometers
def testReducePBCVectors(self): """ Checks that reducePeriodicBoxVectors properly reduces vectors """ a = Vec3(4.24388485, 0.0, 0.0) b = Vec3(-1.4146281691908937, 4.001173048368583, 0.0) c = Vec3(-1.4146281691908937, -2.0005862820516203, 3.4651176446201674) vecs = reducePeriodicBoxVectors((a, b, c) * nanometers) vecs2 = computePeriodicBoxVectors(4.24388485, 4.24388485, 4.24388485, 109.4712190 * degrees, 109.4712190 * degrees, 109.4712190 * degrees) # Check that the vectors are the same a1, a2, a3 = vecs b1, b2, b3 = vecs2 for x, y in zip(a1, b1): self.assertAlmostEqual(strip_units(x), strip_units(y)) for x, y in zip(a2, b2): self.assertAlmostEqual(strip_units(x), strip_units(y)) for x, y in zip(a3, b3): self.assertAlmostEqual(strip_units(x), strip_units(y))
def add_solvent(pdb_filepath: str, ani_input: dict, pdb_output_filepath: str, box_length: unit.quantity.Quantity = (2.5 * unit.nanometer)): assert (type(box_length) == unit.Quantity) pdb = PDBFixer(filename=pdb_filepath) # Step 0: put the ligand in the center #pdb.positions = np.array(pdb.positions.value_in_unit(unit.nanometer)) + box_length/2 # add water l = box_length.value_in_unit(unit.nanometer) pdb.addSolvent(boxVectors=(Vec3(l, 0.0, 0.0), Vec3(0.0, l, 0.0), Vec3(0.0, 0.0, l))) # Step 1: convert coordinates from standard cartesian coordinate to unit # cell coordinates #inv_cell = 1/box_length #coordinates_cell = np.array(pdb.positions.value_in_unit(unit.nanometer)) * inv_cell # Step 2: wrap cell coordinates into [0, 1) #coordinates_cell -= np.floor(coordinates_cell) # Step 3: convert back to coordinates #coordinates_cell = (coordinates_cell * box_length) * unit.nanometer #pdb.positions = coordinates_cell from simtk.openmm.app import PDBFile PDBFile.writeFile(pdb.topology, pdb.positions, open(pdb_output_filepath, 'w')) atom_list = [] coord_list = [] for atom, coor in zip(pdb.topology.atoms(), pdb.positions): if atom.residue.name != 'HOH': continue atom_list.append(atom.element.symbol) coor = coor.value_in_unit(unit.angstrom) coord_list.append([coor[0], coor[1], coor[2]]) ani_input['solvent_atoms'] = ''.join(atom_list) ani_input['solvent_coords'] = np.array(coord_list) * unit.angstrom ani_input['box_length'] = box_length
def calc_new(self, coords, dirname): from simtk.openmm import Vec3 import simtk.unit as u try: self.M.xyzs[0] = coords.reshape(-1, 3) * bohr2ang pos = [Vec3(self.M.xyzs[0][i,0]/10, self.M.xyzs[0][i,1]/10, self.M.xyzs[0][i,2]/10) for i in range(self.M.na)]*u.nanometer self.simulation.context.setPositions(pos) state = self.simulation.context.getState(getEnergy=True, getForces=True) energy = state.getPotentialEnergy().value_in_unit(u.kilojoule_per_mole) / eqcgmx gradient = state.getForces(asNumpy=True).flatten() / fqcgmx except: raise OpenMMEngineError return energy, gradient
def test_create_calibration_system_with_boxspecs(self): """Create a solvated imidazole system.""" # An protonated imidazole molecule in vacuum vacuum_file = get_test_data("imidazole.pdb", "testsystems/imidazole_explicit") input_xml = get_test_data( "protons-imidazole-ph-feature.xml", "testsystems/imidazole_explicit" ) output_basename = str(uuid4()) hxml = "{}-h.xml".format(str(uuid4())) protons_app.ligands.create_hydrogen_definitions(input_xml, hxml) protons_app.ligands.prepare_calibration_systems( vacuum_file, output_basename, ffxml=input_xml, hxml=hxml, delete_old_H=True, minimize=False, box_size=unit.Quantity(Vec3(1.2, 1.2, 1.2), unit.nanometer), )
def _construct_box_vectors(line): """Create the periodic box vectors based on the values stored in the file. @param[in] line The line containing the description """ sline = line.split() values = [float(i) for i in sline] if len(sline) == 3: return (Vec3(values[0], 0, 0), Vec3( 0, values[1], 0), Vec3(0, 0, values[2])) * nanometers return (Vec3(values[0], values[3], values[4]), Vec3(values[5], values[1], values[6]), Vec3(values[7], values[8], values[2])) * nanometers
def build_water_system(box_width): ff = app.ForceField("tip3p.xml") # Create empty topology and coordinates. top = app.Topology() pos = unit.Quantity((), unit.angstroms) m = app.Modeller(top, pos) boxSize = Vec3(box_width, box_width, box_width) * unit.nanometers m.addSolvent(ff, boxSize=boxSize, model="tip3p") system = ff.createSystem(m.getTopology(), nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) positions = m.getPositions() positions = unit.Quantity(np.array(positions / positions.unit), positions.unit) assert m.getTopology().getNumAtoms() == positions.shape[0] # TODO: minimize the water box (BFGS or scipy.optimize) return system, positions, np.eye(3) * box_width, m.getTopology()
def testVec3Multiplication(self): vec1 = Vec3(1, 2, 3) factor = 2 result = Vec3(2, 4, 6) self.assertEqual(vec1 * factor, result) self.assertEqual(factor * vec1, result)
def testVec3Equality(self): vec1 = Vec3(1, 2, 3) vec2 = Vec3(1, 2, 3) self.assertEqual(vec1, vec2)
def testNegation(self): vec1 = Vec3(1, 2, 3) vec1_neg = Vec3(-1, -2, -3) self.assertEqual(-vec1, vec1_neg)
def oemol_to_openmmTop(mol): """ This function converts an OEMol to an openmm topology The OEMol coordinates are assumed to be in Angstrom unit Parameters: ----------- mol: OEMol molecule The molecule to convert Return: ------- topology : OpenMM Topology The generated OpenMM topology positions : OpenMM Quantity The molecule atom positions associated with the generated topology in Angstrom units """ # OE Hierarchical molecule view hv = oechem.OEHierView( mol, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived + oechem.OEAssumption_PDBOrder) # Create empty OpenMM Topology topology = app.Topology() # Dictionary used to map oe atoms to openmm atoms oe_atom_to_openmm_at = {} for chain in hv.GetChains(): # Create empty OpenMM Chain openmm_chain = topology.addChain(chain.GetChainID()) for frag in chain.GetFragments(): for hres in frag.GetResidues(): # Get OE residue oe_res = hres.GetOEResidue() # Create OpenMM residue openmm_res = topology.addResidue(oe_res.GetName(), openmm_chain) for oe_at in hres.GetAtoms(): # Select atom element based on the atomic number element = app.element.Element.getByAtomicNumber( oe_at.GetAtomicNum()) # Add atom OpenMM atom to the topology openmm_at = topology.addAtom(oe_at.GetName(), element, openmm_res) openmm_at.index = oe_at.GetIdx() # Add atom to the mapping dictionary oe_atom_to_openmm_at[oe_at] = openmm_at if topology.getNumAtoms() != mol.NumAtoms(): raise ValueError( "OpenMM topology and OEMol number of atoms mismatching: " "OpenMM = {} vs OEMol = {}".format(topology.getNumAtoms(), mol.NumAtoms())) # Count the number of bonds in the openmm topology omm_bond_count = 0 def IsAmideBond(oe_bond): # This supporting function checks if the passed bond is an amide bond or not. # Our definition of amide bond C-N between a Carbon and a Nitrogen atom is: # O # ║ # CA or O-C-N- # | # The amide bond C-N is a single bond if oe_bond.GetOrder() != 1: return False atomB = oe_bond.GetBgn() atomE = oe_bond.GetEnd() # The amide bond is made by Carbon and Nitrogen atoms if not (atomB.IsCarbon() and atomE.IsNitrogen() or (atomB.IsNitrogen() and atomE.IsCarbon())): return False # Select Carbon and Nitrogen atoms if atomB.IsCarbon(): C_atom = atomB N_atom = atomE else: C_atom = atomE N_atom = atomB # Carbon and Nitrogen atoms must have 3 neighbour atoms if not (C_atom.GetDegree() == 3 and N_atom.GetDegree() == 3): return False double_bonds = 0 single_bonds = 0 for bond in C_atom.GetBonds(): # The C-O bond can be single or double. if (bond.GetBgn() == C_atom and bond.GetEnd().IsOxygen()) or \ (bond.GetBgn().IsOxygen() and bond.GetEnd() == C_atom): if bond.GetOrder() == 2: double_bonds += 1 if bond.GetOrder() == 1: single_bonds += 1 # The CA-C bond is single if (bond.GetBgn() == C_atom and bond.GetEnd().IsCarbon()) or \ (bond.GetBgn().IsCarbon() and bond.GetEnd() == C_atom): if bond.GetOrder() == 1: single_bonds += 1 # Just one double and one single bonds are connected to C # In this case the bond is an amide bond if double_bonds == 1 and single_bonds == 1: return True else: return False # Creating bonds for oe_bond in mol.GetBonds(): omm_bond_count += 1 # Set the bond type if oe_bond.GetType() is not "": if oe_bond.GetType() in [ 'Single', 'Double', 'Triple', 'Aromatic', 'Amide' ]: omm_bond_type = oe_bond.GetType() else: omm_bond_type = None else: if oe_bond.IsAromatic(): oe_bond.SetType("Aromatic") omm_bond_type = "Aromatic" elif oe_bond.GetOrder() == 2: oe_bond.SetType("Double") omm_bond_type = "Double" elif oe_bond.GetOrder() == 3: oe_bond.SetType("Triple") omm_bond_type = "Triple" elif IsAmideBond(oe_bond): oe_bond.SetType("Amide") omm_bond_type = "Amide" elif oe_bond.GetOrder() == 1: oe_bond.SetType("Single") omm_bond_type = "Single" else: omm_bond_type = None topology.addBond(oe_atom_to_openmm_at[oe_bond.GetBgn()], oe_atom_to_openmm_at[oe_bond.GetEnd()], type=omm_bond_type, order=oe_bond.GetOrder()) if omm_bond_count != mol.NumBonds(): raise ValueError( "OpenMM topology and OEMol number of bonds mismatching: " "OpenMM = {} vs OEMol = {}".format(omm_bond_count, mol.NumBonds())) dic = mol.GetCoords() positions = [Vec3(v[0], v[1], v[2]) for k, v in dic.items()] * unit.angstrom return topology, positions
def randompos(self): pos = Vec3(np.random.rand() * self.boxsize[0], np.random.rand() * self.boxsize[1], np.random.rand() * (self.zmax - self.zmin) + self.zmin) return pos * nanometer
def __init__(self, file): """Load a .gro file. The atom positions can be retrieved by calling getPositions(). Parameters: - file (string) the name of the file to load """ xyzs = [] elements = [ ] # The element, most useful for quantum chemistry calculations atomname = [] # The atom name, for instance 'HW1' comms = [] resid = [] resname = [] boxes = [] xyz = [] ln = 0 frame = 0 for line in open(file): if ln == 0: comms.append(line.strip()) elif ln == 1: na = int(line.strip()) elif _is_gro_coord(line): if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame. (thisresnum, thisresname, thisatomname) = [ line[i * 5:i * 5 + 5].strip() for i in range(3) ] resname.append(thisresname) resid.append(int(thisresnum)) atomname.append(thisatomname) thiselem = thisatomname if len(thiselem) > 1: thiselem = thiselem[0] + sub('[A-Z0-9]', '', thiselem[1:]) try: elements.append(elem.get_by_symbol(thiselem)) except KeyError: elements.append(None) firstDecimalPos = line.index('.', 20) secondDecimalPos = line.index('.', firstDecimalPos + 1) digits = secondDecimalPos - firstDecimalPos pos = [ float(line[20 + i * digits:20 + (i + 1) * digits]) for i in range(3) ] xyz.append(Vec3(pos[0], pos[1], pos[2])) elif _is_gro_box(line) and ln == na + 2: sline = line.split() boxes.append(_construct_box_vectors(line)) xyzs.append(xyz * nanometers) xyz = [] ln = -1 frame += 1 else: raise Exception("Unexpected line in .gro file: " + line) ln += 1 ## The atom positions read from the file. If the file contains multiple frames, these are the positions in the first frame. self.positions = xyzs[0] ## A list containing the element of each atom stored in the file self.elements = elements ## A list containing the name of each atom stored in the file self.atomNames = atomname ## A list containing the ID of the residue that each atom belongs to self.residueIds = resid ## A list containing the name of the residue that each atom belongs to self.residueNames = resname self._positions = xyzs self._periodicBoxVectors = boxes self._numpyPositions = None
def __init__(self, file, extraParticleIdentifier='EP'): """Load a PDB file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string the name of the file to load extraParticleIdentifier : string='EP' if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle """ metalElements = [ 'Al', 'As', 'Ba', 'Ca', 'Cd', 'Ce', 'Co', 'Cs', 'Cu', 'Dy', 'Fe', 'Gd', 'Hg', 'Ho', 'In', 'Ir', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na', 'Ni', 'Pb', 'Pd', 'Pt', 'Rb', 'Rh', 'Sm', 'Sr', 'Te', 'Tl', 'V', 'W', 'Yb', 'Zn' ] top = Topology() ## The Topology read from the PDB file self.topology = top # Load the PDB file if isinstance(file, PdbStructure): pdb = file else: inputfile = file own_handle = False if isinstance(file, str): inputfile = open(file) own_handle = True pdb = PdbStructure(inputfile, load_all_models=True, extraParticleIdentifier=extraParticleIdentifier) if own_handle: inputfile.close() PDBFile._loadNameReplacementTables() # Build the topology atomByNumber = {} for chain in pdb.iter_chains(): c = top.addChain(chain.chain_id) for residue in chain.iter_residues(): resName = residue.get_name() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c, str(residue.number), residue.insertion_code) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} for atom in residue.iter_atoms(): atomName = atom.get_name() if atomName in atomReplacements: atomName = atomReplacements[atomName] atomName = atomName.strip() element = atom.element if element == 'EP': element = None elif element is None: # Try to guess the element. upper = atomName.upper() while len(upper) > 1 and upper[0].isdigit(): upper = upper[1:] if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif upper.startswith('ZN'): element = elem.zinc elif (len(residue) == 1 and upper.startswith('CA')): element = elem.calcium else: try: element = elem.get_by_symbol(upper[0]) except KeyError: pass newAtom = top.addAtom(atomName, element, r, str(atom.serial_number)) atomByNumber[atom.serial_number] = newAtom self._positions = [] for model in pdb.iter_models(True): coords = [] for chain in model.iter_chains(): for residue in chain.iter_residues(): for atom in residue.iter_atoms(): pos = atom.get_position().value_in_unit(nanometers) coords.append(Vec3(pos[0], pos[1], pos[2])) self._positions.append(coords * nanometers) ## The atom positions read from the PDB file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors()) self.topology.createStandardBonds() self.topology.createDisulfideBonds(self.positions) self._numpyPositions = None # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added. connectBonds = [] for connect in pdb.models[-1].connects: i = connect[0] for j in connect[1:]: if i in atomByNumber and j in atomByNumber: if atomByNumber[i].element is not None and atomByNumber[ j].element is not None: if atomByNumber[ i].element.symbol not in metalElements and atomByNumber[ j].element.symbol not in metalElements: connectBonds.append( (atomByNumber[i], atomByNumber[j])) elif atomByNumber[ i].element.symbol in metalElements and atomByNumber[ j].residue.name not in PDBFile._standardResidues: connectBonds.append( (atomByNumber[i], atomByNumber[j])) elif atomByNumber[ j].element.symbol in metalElements and atomByNumber[ i].residue.name not in PDBFile._standardResidues: connectBonds.append( (atomByNumber[i], atomByNumber[j])) else: connectBonds.append((atomByNumber[i], atomByNumber[j])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def add_droplet( self, topology: md.Topology, coordinates: unit.quantity.Quantity, diameter: unit.quantity.Quantity = (30.0 * unit.angstrom), restrain_hydrogen_bonds: bool = True, restrain_hydrogen_angles: bool = False, top_file: str = "", ) -> md.Trajectory: """ Adding a droplet with a given diameter around a small molecule. Parameters ---------- topology: md.Topology topology of the molecule coordinates: np.array, unit'd diameter: float, unit'd top_file: str if top_file is provided the final droplet pdb is either kept and can be reused or if top_file already exists it will be used to create the same droplet. Returns ---------- A mdtraj.Trajectory object with the ligand centered in the solvent for inspection. """ assert type(diameter) == unit.Quantity assert type(topology) == md.Topology assert type(coordinates) == unit.Quantity if restrain_hydrogen_bonds: logger.debug("Hydrogen bonds are restraint.") if restrain_hydrogen_angles: logger.warning("HOH angles are restraint.") # get topology from mdtraj to PDBfixer via pdb file radius = diameter.value_in_unit(unit.angstrom) / 2 center = np.array([radius, radius, radius]) # if no solvated pdb file is provided generate one if top_file: # read in the file with the defined droplet pdb_filepath = top_file else: # generage a one time droplet pdb_filepath = f"tmp{random.randint(1,10000000)}.pdb" if not os.path.exists(pdb_filepath): logger.info(f"Generating droplet for {pdb_filepath}...") # mdtraj works with nanomter md.Trajectory(coordinates.value_in_unit(unit.nanometer), topology).save_pdb(pdb_filepath) pdb = PDBFixer(filename=pdb_filepath) os.remove(pdb_filepath) # put the ligand in the center l_in_nanometer = diameter.value_in_unit(unit.nanometer) pdb.positions = np.array( pdb.positions.value_in_unit( unit.nanometer)) + (l_in_nanometer / 2) # add water pdb.addSolvent(boxVectors=( Vec3(l_in_nanometer, 0.0, 0.0), Vec3(0.0, l_in_nanometer, 0.0), Vec3(0.0, 0.0, l_in_nanometer), )) # get topology from PDBFixer to mdtraj # NOTE: a second tmpfile - not happy about this from simtk.openmm.app import PDBFile PDBFile.writeFile(pdb.topology, pdb.positions, open(pdb_filepath, "w")) # load pdb in parmed logger.debug("Load with parmed ...") structure = pm.load_file(pdb_filepath) os.remove(pdb_filepath) # search for residues that are outside of the cutoff and delete them to_delete = [] logger.debug("Flag residues ...") for residue in structure.residues: for atom in residue: p1 = np.array([atom.xx, atom.xy, atom.xz]) p2 = center squared_dist = np.sum((p1 - p2)**2, axis=0) dist = np.sqrt(squared_dist) if ( dist > radius + 1 ): # NOTE: distance must be greater than radius + 1 Angstrom to_delete.append(residue) # only delete water molecules for residue in list(set(to_delete)): if residue.name == "HOH": logger.debug(f"Remove: {residue}") structure.residues.remove(residue) else: logger.warning( f"Residue {residue} reaches outside the droplet") print(f"Residue {residue} reaches outside the droplet") structure.write_pdb(pdb_filepath) # load pdb with mdtraj traj = md.load(pdb_filepath) if not top_file: os.remove(pdb_filepath) # set coordinates #NOTE: note the xyz[0] self._ligand_in_water_coordinates = traj.xyz[0] * unit.nanometer # generate atom string atom_list = [] for atom in traj.topology.atoms: atom_list.append(atom.element.symbol) # set atom string self.ligand_in_water_atoms = "".join(atom_list) # set mdtraj topology self.ligand_in_water_topology = traj.topology # set FlattBottomRestraintToCenter on each oxygen self.solvent_restraints = [] for residue in traj.topology.residues: if residue.is_water: for atom in residue.atoms: if str(atom.element.symbol) == "O": self.solvent_restraints.append( CenterFlatBottomRestraint( sigma=0.1 * unit.angstrom, point=center * unit.angstrom, radius=(diameter / 2), atom_idx=atom.index, active_at=-1, )) logger.debug("Adding restraint to center to {}".format( atom.index)) if restrain_hydrogen_bonds or restrain_hydrogen_angles: for residue in traj.topology.residues: if residue.is_water: oxygen_idx = -1 hydrogen_idxs = [] for atom in residue.atoms: if str(atom.element.symbol) == "O": oxygen_idx = atom.index elif str(atom.element.symbol) == "H": hydrogen_idxs.append(atom.index) else: raise RuntimeError( "Water should only consist of O and H atoms.") if restrain_hydrogen_bonds: self.solvent_restraints.append( BondFlatBottomRestraint( sigma=0.2 * unit.angstrom, atom_i_idx=oxygen_idx, atom_j_idx=hydrogen_idxs[0], atoms=self.ligand_in_water_atoms, )) self.solvent_restraints.append( BondFlatBottomRestraint( sigma=0.2 * unit.angstrom, atom_i_idx=oxygen_idx, atom_j_idx=hydrogen_idxs[1], atoms=self.ligand_in_water_atoms, )) if restrain_hydrogen_angles: self.solvent_restraints.append( AngleHarmonicRestraint( sigma=0.1 * unit.radian, atom_i_idx=hydrogen_idxs[0], atom_j_idx=oxygen_idx, atom_k_idx=hydrogen_idxs[1], )) # return a mdtraj object for visual check return md.Trajectory( self._ligand_in_water_coordinates.value_in_unit(unit.nanometer), self.ligand_in_water_topology, )
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] # Load the file. inputFile = file if isinstance(file, str): inputFile = open(file) reader = PdbxReader(inputFile) data = [] reader.read(data) block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('label_comp_id') resIdCol = atomData.getAttributeIndex('label_seq_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') asymIdCol = atomData.getAttributeIndex('label_asym_id') chainIdCol = atomData.getAttributeIndex('label_entity_id') elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastResId = None lastAsymId = None atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resIdCol], row[asymIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if lastChainId != row[chainIdCol]: # The start of a new chain. chain = top.addChain(row[asymIdCol]) lastChainId = row[chainIdCol] lastResId = None lastAsymId = None if lastResId != row[resIdCol] or lastAsymId != row[ asymIdCol] or (lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. res = top.addResidue( row[resNameCol], chain, None if resNumCol == -1 else row[resNumCol]) lastResId = row[resIdCol] lastAsymId = row[asymIdCol] atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atom = top.addAtom(row[atomNameCol], element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(row[atomNameCol]) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ('length_a', 'length_b', 'length_c') ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma') ] self.topology.setPeriodicBoxVectors( computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def copyToLocalCoords(self, coords): """ copy to local coords """ # copy to local coords for i in range(self.natoms): self.localCoords[i] = Vec3(coords[3 * i], coords[3 * i + 1], coords[3 * i + 2])
def oesolvate(solute, density=1.0, padding_distance=10.0, distance_between_atoms=2.5, solvents='tip3p', molar_fractions='1.0', geometry='box', close_solvent=True, salt='[Na+], [Cl-]', salt_concentration=0.0, neutralize_solute=True, verbose=False, return_components=False, **kargs): """ This function solvates the passed solute in a cubic box or a sphere by using Packmol. Packmol creates an initial point for molecular dynamics simulations by packing molecule in defined regions of space. For additional info: http://www.ime.unicamp.br/~martinez/packmol/home.shtml The geometry volume is estimated by the using the padding parameter and the solute size. The number of solvent molecules is calculated by using the specified density and volume. Solvent molecules are specified as comma separated smiles strings. The molar fractions of each solvent molecule are specified in a similar fashion. By default if the solute is charged counter ions are added to neutralize it Parameters: ----------- solute: OEMol molecule The solute to solvate density: float The solution density in g/ml padding_distance: float The largest dimension of the solute (along the x, y, or z axis) is determined (in A), and a cubic box of size (largest dimension)+2*padding is used distance_between_atoms: float The minimum distance between atoms in A solvents: python string A comma separated smiles string or keywords for the solvent molecules. Special water models can be selected by using the keywords: tip3p for TIP3P water model geometry molar_fractions: python string A comma separated molar fraction string of the solvent molecules close_solvent: boolean If True solvent molecules will be placed very close to the solute salt: python string A comma separated string of the dissociated salt in solution salt_concentration: float Salt concentration in millimolar neutralize_solute: boolean If True counter-ions will be added to the solution to neutralize the solute verbose: Bool If True verbose mode is enabled return_components: Bool If True the added solvent molecules are also returned as OEMol Return: ------- oe_mol: OEMol The solvated system. If the selected geometry is a box a SD tag with name 'box_vector' is attached the output molecule containing the system box vectors. oe_mol_components: OEMol If the return_components flag is True the added solvent molecules are returned as an additional OEMol """ def BoundingBox(molecule): """ This function calculates the Bounding Box of the passed molecule molecule: OEMol return: bb (numpy array) the calculated bounding box is returned as numpy array: [(xmin,ymin,zmin), (xmax,ymax,zmax)] """ coords = [v for k, v in molecule.GetCoords().items()] np_coords = np.array(coords) min_coord = np_coords.min(axis=0) max_coord = np_coords.max(axis=0) bb = np.array([min_coord, max_coord]) return bb if shutil.which("packmol") is None: raise (IOError("Packmol executable not found")) # Extract solvent smiles strings and mole fractions solvents = [sm.strip() for sm in solvents.split(',')] fractions = [float(mf) for mf in molar_fractions.split(',')] # If the smiles string and mole fractions lists have different lengths raise an error if len(solvents) != len(fractions): raise ValueError( "Selected solvent number and selected molar fraction number mismatch: {} vs {}" .format(len(solvents), len(fractions))) # Remove smiles string with 0.0 mole fraction solvent_smiles = [ solvents[i] for i, v in enumerate(fractions) if fractions[i] ] mol_fractions = [mf for mf in fractions if mf] # Mole fractions are non-negative numbers if any([v < 0.0 for v in mol_fractions]): raise ValueError("Error: Mole fractions are non-negative real numbers") # Mole fractions must sum up to 1.0 if abs(sum(mol_fractions) - 1.0) > 0.001: oechem.OEThrow.Error("Error: Mole fractions do not sum up to 1.0") if geometry not in ['box', 'sphere']: raise ValueError( "Error geometry: the supported geometries are box and sphere not {}" .format(geometry)) # Set Units density = density * unit.grams / unit.milliliter padding_distance = padding_distance * unit.angstrom salt_concentration = salt_concentration * unit.millimolar # Calculate the Solute Bounding Box BB_solute = BoundingBox(solute) # Estimate of the box cube length box_edge = 2.0 * padding_distance + np.max(BB_solute[1] - BB_solute[0]) * unit.angstrom if geometry == 'box': # Box Volume Volume = box_edge**3 if geometry == 'sphere': Volume = (4.0 / 3.0) * 3.14159265 * (0.5 * box_edge)**3 # Omega engine is used to generate conformations omegaOpts = oeomega.OEOmegaOptions() omegaOpts.SetMaxConfs(1) omegaOpts.SetStrictStereo(False) omega = oeomega.OEOmega(omegaOpts) # Create a string code to identify the solute residues. The code ID used is based # on the residue number id, the residue name and the chain id: # id+resname+chainID hv_solute = oechem.OEHierView( solute, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) solute_resid_list = [] for chain in hv_solute.GetChains(): for frag in chain.GetFragments(): for hres in frag.GetResidues(): oe_res = hres.GetOEResidue() solute_resid_list.append( str(oe_res.GetResidueNumber()) + oe_res.GetName() + chain.GetChainID()) # Solvent component list_names solvent_resid_dic_names = dict() # Neutralize solute ion_sum_wgt_n_ions = 0.0 * unit.grams / unit.mole if neutralize_solute: # Container for the counter-ions oe_ions = [] # Container for the ion smiles strings ions_smiles = [] solute_formal_charge = 0 for at in solute.GetAtoms(): solute_formal_charge += at.GetFormalCharge() if solute_formal_charge > 0: ions_smiles.append("[Cl-]") elif solute_formal_charge < 0: ions_smiles.append("[Na+]") else: pass # Total number of counter-ions to neutralize the solute n_ions = abs(solute_formal_charge) # print("Counter ions to add = {} of {}".format(n_ions, ions_smiles[0])) # Ions if n_ions >= 1: for sm in ions_smiles: mol = oechem.OEMol() if not oechem.OESmilesToMol(mol, sm): raise ValueError( "Error counter ions: SMILES string parsing fails for the string: {}" .format(sm)) # Generate conformer if not omega(mol): raise ValueError( "Error counter ions: Conformer generation fails for the molecule with " "smiles string: {}".format(sm)) oe_ions.append(mol) if sm == '[Na+]': solvent_resid_dic_names[' NA'] = mol else: solvent_resid_dic_names[' CL'] = mol ion_sum_wgt = 0.0 * unit.grams / unit.mole for ion in oe_ions: # Molecular weight ion_sum_wgt += oechem.OECalculateMolecularWeight( ion) * unit.grams / unit.mole ion_sum_wgt_n_ions = ion_sum_wgt * n_ions # Create ions .pdb files ions_smiles_pdbs = [] for i in range(0, len(ions_smiles)): pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb')) pdb_name = ions_smiles[i] + '_' + pdb_name ions_smiles_pdbs.append(pdb_name) for i in range(0, len(ions_smiles)): ofs = oechem.oemolostream(ions_smiles_pdbs[i]) oechem.OEWriteConstMolecule(ofs, oe_ions[i]) # Add salts to the solution # Solvent smiles string parsing char_set = string.ascii_uppercase salt_sum_wgt_n_salt = 0.0 * unit.grams / unit.mole if salt_concentration > 0.0 * unit.millimolar: salt_smiles = [sm.strip() for sm in salt.split(',')] # Container list of oemol salt molecules generated by using smiles strings oe_salt = [] for sm in salt_smiles: mol_salt = oechem.OEMol() if not oechem.OESmilesToMol(mol_salt, sm): raise ValueError( "Error salt: SMILES string parsing fails for the string: {}" .format(sm)) # Generate conformer if not omega(mol_salt): raise ValueError( "Error salt: Conformer generation fails for the " "molecule with smiles string: {}".format(sm)) # Unique 3 code letter are set as solvent residue names solv_id = ''.join(random.sample(char_set * 3, 3)) # Try to recognize the residue name oechem.OEPerceiveResidues(mol_salt) for atmol in mol_salt.GetAtoms(): res = oechem.OEAtomGetResidue(atmol) if res.GetName() == 'UNL': res.SetName(solv_id) oechem.OEAtomSetResidue(atmol, res) if solv_id not in solvent_resid_dic_names: solvent_resid_dic_names[solv_id] = mol_salt else: if res.GetName() not in solvent_resid_dic_names: solvent_resid_dic_names[res.GetName()] = mol_salt break oe_salt.append(mol_salt) n_salt = int( round(unit.AVOGADRO_CONSTANT_NA * salt_concentration * Volume.in_units_of(unit.liter))) # for i in range(0, len(salt_smiles)): # print("Number of molecules for the salt component {} = {}".format(salt_smiles[i], n_salt)) salt_sum_wgt = 0.0 * unit.grams / unit.mole for salt in oe_salt: # Molecular weight salt_sum_wgt += oechem.OECalculateMolecularWeight( salt) * unit.grams / unit.mole salt_sum_wgt_n_salt = salt_sum_wgt * n_salt # Create salt .pdb files if n_salt >= 1: salt_pdbs = [] for i in range(0, len(salt_smiles)): pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb')) # pdb_name = salt_smiles[i] + '_' + pdb_name salt_pdbs.append(pdb_name) for i in range(0, len(salt_smiles)): ofs = oechem.oemolostream(salt_pdbs[i]) oechem.OEWriteConstMolecule(ofs, oe_salt[i]) # Container list of oemol solvent molecules generated by using smiles strings oe_solvents = [] for sm in solvent_smiles: if sm == 'tip3p': tip3p_fn = os.path.join(PACKAGE_DIR, 'oeommtools', 'data', 'tip3p.pdb') ifs = oechem.oemolistream(tip3p_fn) mol_sol = oechem.OEMol() if not oechem.OEReadMolecule(ifs, mol_sol): raise IOError( "It was not possible to read the tip3p molecule file") else: mol_sol = oechem.OEMol() if not oechem.OESmilesToMol(mol_sol, sm): raise ValueError( "Error solvent: SMILES string parsing fails for the string: {}" .format(sm)) # Generate conformer if not omega(mol_sol): raise ValueError( "Error solvent: Conformer generation fails for " "the molecule with smiles string: {}".format(sm)) # Unique 3 code letter are set as solvent residue names solv_id = ''.join(random.sample(char_set * 3, 3)) # Try to recognize the residue name oechem.OEPerceiveResidues(mol_sol) for atmol in mol_sol.GetAtoms(): res = oechem.OEAtomGetResidue(atmol) if res.GetName() == 'UNL': res.SetName(solv_id) oechem.OEAtomSetResidue(atmol, res) if solv_id not in solvent_resid_dic_names: solvent_resid_dic_names[solv_id] = mol_sol else: if res.GetName() not in solvent_resid_dic_names: solvent_resid_dic_names[res.GetName()] = mol_sol break oe_solvents.append(mol_sol) # Sum of the solvent molecular weights solvent_sum_wgt_frac = 0.0 * unit.grams / unit.mole for idx in range(0, len(oe_solvents)): # Molecular weight wgt = oechem.OECalculateMolecularWeight( oe_solvents[idx]) * unit.grams / unit.mole solvent_sum_wgt_frac += wgt * mol_fractions[idx] # Solute molecular weight solute_wgt = oechem.OECalculateMolecularWeight( solute) * unit.gram / unit.mole # Estimate of the number of each molecular species present in the solution accordingly # to their molar fraction fi: # # ni = fi*(density*volume*NA - wgt_solute - sum_k(wgt_salt_k*nk) - wgt_ion*n_ion)/sum_j(wgt_nj * fj) # # where ni is the number of molecule of specie i, density the mixture density, volume the # mixture volume, wgt_solute the molecular weight of the solute, wgt_salt_k the molecular # weight of the salt component k, nk the number of molecule of salt component k, wgt_ion # the counter ion molecular weight, n_ions the number of counter ions and wgt_nj the molecular # weight of the molecule specie j with molar fraction fj div = (unit.AVOGADRO_CONSTANT_NA * density * Volume - (solute_wgt + salt_sum_wgt_n_salt + ion_sum_wgt_n_ions)) / solvent_sum_wgt_frac # Solvent number of monomers n_monomers = [int(round(mf * div)) for mf in mol_fractions] if not all([nm > 0 for nm in n_monomers]): raise ValueError( "Error negative number of solvent components: the density could be too low" ) # for i in range(0, len(solvent_smiles)): # print("Number of molecules for the component {} = {}".format(solvent_smiles[i], n_monomers[i])) # Packmol Configuration file setting if close_solvent: header_template = """\n# Mixture\ntolerance {}\nfiletype pdb\noutput {}\nadd_amber_ter\navoid_overlap no""" else: header_template = """\n# Mixture\ntolerance {}\nfiletype pdb\noutput {}\nadd_amber_ter\navoid_overlap yes""" # Templates strings solute_template = """\n\n# Solute\nstructure {}\nnumber 1\nfixed 0. 0. 0. 0. 0. 0.\nresnumbers 1\nend structure""" if geometry == 'box': solvent_template = """\nstructure {}\nnumber {}\ninside box {:0.3f} {:0.3f} {:0.3f} {:0.3f} {:0.3f} {:0.3f}\ \nchain !\nresnumbers 3\nend structure""" if geometry == 'sphere': solvent_template = """\nstructure {}\nnumber {}\ninside sphere {:0.3f} {:0.3f} {:0.3f} {:0.3f}\ \nchain !\nresnumbers 3\nend structure""" # Create solvents .pdb files solvent_pdbs = [] for i in range(0, len(solvent_smiles)): pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb')) solvent_pdbs.append(pdb_name) for i in range(0, len(solvent_smiles)): ofs = oechem.oemolostream(solvent_pdbs[i]) oechem.OEWriteConstMolecule(ofs, oe_solvents[i]) solute_pdb = 'solute' + '_' + os.path.basename( tempfile.mktemp(suffix='.pdb')) ofs = oechem.oemolostream(solute_pdb) if solute.GetMaxConfIdx() > 1: raise ValueError("Solutes with multiple conformers are not supported") else: oechem.OEWriteConstMolecule(ofs, solute) # Write Packmol header section mixture_pdb = 'mixture' + '_' + os.path.basename( tempfile.mktemp(suffix='.pdb')) body = header_template.format(distance_between_atoms, mixture_pdb) # Write Packmol configuration file solute section body += solute_template.format(solute_pdb) # The solute is centered inside the box xc = (BB_solute[0][0] + BB_solute[1][0]) / 2. yc = (BB_solute[0][1] + BB_solute[1][1]) / 2. zc = (BB_solute[0][2] + BB_solute[1][2]) / 2. # Correct for periodic box conditions to avoid # steric clashes at the box edges pbc_correction = 1.0 * unit.angstrom xmin = xc - ((box_edge - pbc_correction) / 2.) / unit.angstrom xmax = xc + ((box_edge - pbc_correction) / 2.) / unit.angstrom ymin = yc - ((box_edge - pbc_correction) / 2.) / unit.angstrom ymax = yc + ((box_edge - pbc_correction) / 2.) / unit.angstrom zmin = zc - ((box_edge - pbc_correction) / 2.) / unit.angstrom zmax = zc + ((box_edge - pbc_correction) / 2.) / unit.angstrom # Packmol setting for the solvent section body += '\n\n# Solvent' for i in range(0, len(solvent_smiles)): if geometry == 'box': body += solvent_template.format(solvent_pdbs[i], n_monomers[i], xmin, ymin, zmin, xmax, ymax, zmax) if geometry == 'sphere': body += solvent_template.format(solvent_pdbs[i], n_monomers[i], xc, yc, zc, 0.5 * box_edge / unit.angstrom) # Packmol setting for the salt section if salt_concentration > 0.0 * unit.millimolar and n_salt >= 1: body += '\n\n# Salt' for i in range(0, len(salt_smiles)): if geometry == 'box': body += solvent_template.format(salt_pdbs[i], int(round(n_salt)), xmin, ymin, zmin, xmax, ymax, zmax) if geometry == 'sphere': body += solvent_template.format(salt_pdbs[i], int(round(n_salt)), xc, yc, zc, 0.5 * box_edge / unit.angstrom) # Packmol setting for the ions section if neutralize_solute and n_ions >= 1: body += '\n\n# Counter Ions' for i in range(0, len(ions_smiles)): if geometry == 'box': body += solvent_template.format(ions_smiles_pdbs[i], n_ions, xmin, ymin, zmin, xmax, ymax, zmax) if geometry == 'sphere': body += solvent_template.format(ions_smiles_pdbs[i], n_ions, xc, yc, zc, 0.5 * box_edge / unit.angstrom) # Packmol configuration file packmol_filename = os.path.basename(tempfile.mktemp(suffix='.inp')) with open(packmol_filename, 'w') as file_handle: file_handle.write(body) # Call Packmol if not verbose: mute_output = open(os.devnull, 'w') with open(packmol_filename, 'r') as file_handle: subprocess.check_call(['packmol'], stdin=file_handle, stdout=mute_output, stderr=mute_output) else: with open(packmol_filename, 'r') as file_handle: subprocess.check_call(['packmol'], stdin=file_handle) # Read in the Packmol solvated system solvated = oechem.OEMol() if os.path.exists(mixture_pdb + '_FORCED'): os.rename(mixture_pdb + '_FORCED', mixture_pdb) print("Warning: Packing solution is not optimal") ifs = oechem.oemolistream(mixture_pdb) oechem.OEReadMolecule(ifs, solvated) # To avoid to change the user oemol starting solute by reading in # the generated mixture pdb file and loosing molecule info, the # solvent molecules are extracted from the mixture system and # added back to the starting solute # Extract from the solution system the solvent molecules # by checking the previous solute generated ID: id+resname+chainID hv_solvated = oechem.OEHierView( solvated, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived) # This molecule will hold the solvent molecules generated directly from # the omega conformers. This is useful to avoid problems related to read in # the solvent molecules from pdb files and triggering unwanted perceiving actions new_components = oechem.OEMol() bv = oechem.OEBitVector(solvated.GetMaxAtomIdx()) for chain in hv_solvated.GetChains(): for frag in chain.GetFragments(): for hres in frag.GetResidues(): oe_res = hres.GetOEResidue() if str(oe_res.GetResidueNumber()) + oe_res.GetName( ) + chain.GetChainID() not in solute_resid_list: oechem.OEAddMols(new_components, solvent_resid_dic_names[oe_res.GetName()]) atms = hres.GetAtoms() for at in atms: bv.SetBitOn(at.GetIdx()) pred = oechem.OEAtomIdxSelected(bv) components = oechem.OEMol() oechem.OESubsetMol(components, solvated, pred) new_components.SetCoords(components.GetCoords()) # This is necessary otherwise just one big residue is created oechem.OEPerceiveResidues(new_components) # Add the solvent molecules to the solute copy solvated_system = solute.CreateCopy() oechem.OEAddMols(solvated_system, new_components) # Set Title solvated_system.SetTitle(solute.GetTitle()) # Set ions resname to Na+ and Cl- for at in solvated_system.GetAtoms(): res = oechem.OEAtomGetResidue(at) if res.GetName() == ' NA': res.SetName("Na+") oechem.OEAtomSetResidue(atmol, res) elif res.GetName() == ' CL': res.SetName("Cl-") oechem.OEAtomSetResidue(atmol, res) else: pass # Cleaning to_delete = solvent_pdbs + [packmol_filename, solute_pdb, mixture_pdb] if salt_concentration > 0.0 * unit.millimolar and n_salt >= 1: to_delete += salt_pdbs if neutralize_solute and n_ions >= 1: to_delete += ions_smiles_pdbs for fn in to_delete: try: os.remove(fn) except: pass # Calculate the solution total density total_wgt = oechem.OECalculateMolecularWeight( solvated_system) * unit.gram / unit.mole density_mix = (1 / unit.AVOGADRO_CONSTANT_NA) * total_wgt / Volume print("Computed Solution Density = {}".format( density_mix.in_units_of(unit.gram / unit.milliliter))) # Threshold checking ths = 0.1 * unit.gram / unit.milliliter if not abs(density - density_mix.in_units_of(unit.gram / unit.milliliter)) < ths: raise ValueError( "Error: the computed density for the solute {} does not match the selected density {} vs {}" .format(solute.GetTitle(), density_mix, density)) if geometry == 'box': # Define the box vector and attached it as SD tag to the solvated system # with ID tag: 'box_vectors' box_vectors = (Vec3(box_edge / unit.angstrom, 0.0, 0.0), Vec3(0.0, box_edge / unit.angstrom, 0.0), Vec3(0.0, 0.0, box_edge / unit.angstrom)) * unit.angstrom box_vectors = data_utils.encodePyObj(box_vectors) solvated_system.SetData(oechem.OEGetTag('box_vectors'), box_vectors) if return_components: new_components.SetTitle(solute.GetTitle() + '_solvent_comp') return solvated_system, new_components else: return solvated_system
def randomsurfpos(self, zval): pos = Vec3(np.random.rand() * self.boxsize[0], np.random.rand() * self.boxsize[1], zval) return pos * nanometer
def testVec3Attributes(self): vec1 = Vec3(1, 2, 3) self.assertEqual(vec1.x, 1) self.assertEqual(vec1.y, 2) self.assertEqual(vec1.z, 3)
def __init__(self, file): """Load a PDB file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters: - file (string) the name of the file to load """ top = Topology() ## The Topology read from the PDB file self.topology = top # Load the PDB file if isinstance(file, PdbStructure): pdb = file else: inputfile = file if isinstance(file, str): inputfile = open(file) pdb = PdbStructure(inputfile, load_all_models=True) PDBFile._loadNameReplacementTables() # Build the topology atomByNumber = {} for chain in pdb.iter_chains(): c = top.addChain() for residue in chain.iter_residues(): resName = residue.get_name() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} for atom in residue.atoms: atomName = atom.get_name() if atomName in atomReplacements: atomName = atomReplacements[atomName] atomName = atomName.strip() element = atom.element if element is None: # Try to guess the element. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('BE'): element = elem.beryllium elif upper.startswith('LI'): element = elem.lithium elif upper.startswith('K'): element = elem.potassium elif (len(residue) == 1 and upper.startswith('CA')): element = elem.calcium else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: pass newAtom = top.addAtom(atomName, element, r) atomByNumber[atom.serial_number] = newAtom self._positions = [] for model in pdb.iter_models(True): coords = [] for chain in model.iter_chains(): for residue in chain.iter_residues(): for atom in residue.atoms: pos = atom.get_position().value_in_unit(nanometers) coords.append(Vec3(pos[0], pos[1], pos[2])) self._positions.append(coords * nanometers) ## The atom positions read from the PDB file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.setUnitCellDimensions(pdb.get_unit_cell_dimensions()) self.topology.createStandardBonds() self.topology.createDisulfideBonds(self.positions) self._numpyPositions = None # Add bonds based on CONECT records. connectBonds = [] for connect in pdb.models[0].connects: i = connect[0] for j in connect[1:]: if i in atomByNumber and j in atomByNumber: connectBonds.append((atomByNumber[i], atomByNumber[j])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def prep_system(box_width): # if model not in supported_models: # raise Exception("Specified water model '%s' is not in list of supported models: %s" % (model, str(supported_models))) # Load forcefield for solvent model and ions. # force_fields = ['tip3p.xml'] # if ionic_strength != 0.0*unit.molar: # force_fields.append('amber99sb.xml') # For the ions. ff = app.ForceField('tip3p.xml') # Create empty topology and coordinates. top = app.Topology() pos = unit.Quantity((), unit.angstroms) # Create new Modeller instance. m = app.Modeller(top, pos) boxSize = Vec3(box_width, box_width, box_width) * unit.nanometers # boxSize = unit.Quantity(numpy.ones([3]) * box_edge / box_edge.unit, box_edge.unit) m.addSolvent(ff, boxSize=boxSize, model='tip3p') system = ff.createSystem(m.getTopology(), nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) positions = m.getPositions() positions = unit.Quantity(np.array(positions / positions.unit), positions.unit) # pdb_str = io.StringIO() fname = "debug.pdb" fhandle = open(fname, "w") PDBFile.writeHeader(m.getTopology(), fhandle) PDBFile.writeModel(m.getTopology(), positions, fhandle, 0) PDBFile.writeFooter(m.getTopology(), fhandle) return system, positions, np.eye(3) * box_width, fname assert 0 # , positiveIon=positive_ion, # negativeIon=negative_ion, ionicStrength=ionic_strength) # Get new topology and coordinates. newtop = m.getTopology() newpos = m.getPositions() # Convert positions to numpy. positions = unit.Quantity(numpy.array(newpos / newpos.unit), newpos.unit) # Create OpenMM System. system = ff.createSystem(newtop, nonbondedMethod=nonbondedMethod, nonbondedCutoff=cutoff, constraints=None, rigidWater=constrained, removeCMMotion=False) # Set switching function and dispersion correction. forces = { system.getForce(index).__class__.__name__: system.getForce(index) for index in range(system.getNumForces()) } forces['NonbondedForce'].setUseSwitchingFunction(False) if switch_width is not None: forces['NonbondedForce'].setUseSwitchingFunction(True) forces['NonbondedForce'].setSwitchingDistance(cutoff - switch_width) forces['NonbondedForce'].setUseDispersionCorrection(dispersion_correction) forces['NonbondedForce'].setEwaldErrorTolerance(ewaldErrorTolerance) n_atoms = system.getNumParticles() self.ndof = 3 * n_atoms - (constrained * n_atoms) self.topology = m.getTopology() self.system = system self.positions = positions
def writeModel(self, positions, unitCellDimensions=None, periodicBoxVectors=None): """Write out a model to the DCD file. The periodic box can be specified either by the unit cell dimensions (for a rectangular box), or the full set of box vectors (for an arbitrary triclinic box). If neither is specified, the box vectors specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. Parameters ---------- positions : list The list of atomic positions to write unitCellDimensions : Vec3=None The dimensions of the crystallographic unit cell. periodicBoxVectors : tuple of Vec3=None The vectors defining the periodic box. """ if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file self._modelCount += 1 if self._interval > 1 and self._firstStep+self._modelCount*self._interval > 1<<31: # This will exceed the range of a 32 bit integer. To avoid crashing or producing a corrupt file, # update the header to say the trajectory consisted of a smaller number of larger steps (so the # total trajectory length remains correct). self._firstStep //= self._interval self._dt *= self._interval self._interval = 1 file.seek(0, os.SEEK_SET) file.write(struct.pack('<i4c9if', 84, b'C', b'O', b'R', b'D', 0, self._firstStep, self._interval, 0, 0, 0, 0, 0, 0, self._dt)) # Update the header. file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write(struct.pack('<i', self._firstStep+self._modelCount*self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxVectors = self._topology.getPeriodicBoxVectors() if boxVectors is not None: if periodicBoxVectors is not None: boxVectors = periodicBoxVectors elif unitCellDimensions is not None: if is_quantity(unitCellDimensions): unitCellDimensions = unitCellDimensions.value_in_unit(nanometers) boxVectors = (Vec3(unitCellDimensions[0], 0, 0), Vec3(0, unitCellDimensions[1], 0), Vec3(0, 0, unitCellDimensions[2]))*nanometers (a_length, b_length, c_length, alpha, beta, gamma) = computeLengthsAndAngles(boxVectors) a_length = a_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. b_length = b_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. c_length = c_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. angle1 = math.sin(math.pi/2-gamma) angle2 = math.sin(math.pi/2-beta) angle3 = math.sin(math.pi/2-alpha) file.write(struct.pack('<i6di', 48, a_length, angle1, b_length, angle2, angle3, c_length, 48)) length = struct.pack('<i', 4*len(positions)) for i in range(3): file.write(length) data = array.array('f', (10*x[i] for x in positions)) data.tofile(file) file.write(length) try: file.flush() except AttributeError: pass
def run(opts): system_xml_file = opts.system integrator_xml_file = opts.integrator coords_f = opts.coords platform_name = opts.platform deviceid = opts.device write_freq = opts.write_freq output = opts.output nsteps = opts.nsteps platform_properties = { 'OpenCLPrecision': 'mixed', 'OpenCLPlatformIndex': '0', 'OpenCLDeviceIndex': '0', 'CudaPrecision': 'mixed', 'CudaDeviceIndex': '0', 'CpuThreads': '1' } platform_properties['CudaDeviceIndex'] = deviceid platform_properties['OpenCLDeviceIndex'] = deviceid with open(system_xml_file, 'r') as f: system = openmm.XmlSerializer.deserialize(f.read()) with open(integrator_xml_file, 'r') as f: integrator = openmm.XmlSerializer.deserialize(f.read()) integrator.setRandomNumberSeed(random.randint(0, 2**16)) platform = openmm.Platform.getPlatformByName(platform_name) properties = { key: platform_properties[key] for key in platform_properties if key.lower().startswith(platform_name.lower()) } if platform_name == 'CPU': properties = {'CpuThreads': '1'} print properties # Create dummy topology to satisfy Simulation object topology = app.Topology() volume = wcadimer.natoms / wcadimer.density length = volume**(1.0 / 3.0) L = length.value_in_unit(units.nanometer) topology.setUnitCellDimensions(Vec3(L, L, L) * units.nanometer) simulation = app.Simulation(topology, system, integrator, platform, properties) init_data = np.load(coords_f) coords = units.Quantity(init_data['coord'], units.nanometer) simulation.context.setPositions(coords) if 'veloc' in init_data: velocs = units.Quantity(init_data['veloc'], units.nanometer / units.picosecond) simulation.context.setVelocities(velocs) else: simulation.context.setVelocitiesToTemperature(wcadimer.temperature) # Attach reporters simulation.reporters.append( HDF5Reporter(output + '.h5', write_freq, atomSubset=[0, 1])) simulation.reporters.append( app.StateDataReporter(stdout, 20 * write_freq, step=True, potentialEnergy=True, temperature=True, progress=True, remainingTime=True, speed=True, totalSteps=nsteps, separator='\t')) # Run segment simulation.step(nsteps) # Write restart data state = simulation.context.getState(getPositions=True, getVelocities=True) coords = state.getPositions(asNumpy=True) velocs = state.getVelocities(asNumpy=True) np.savez_compressed(output + '_restart.npz', coords, coord=coords, veloc=velocs)
def rdmol_to_openmmTop(mol, confId = 0): """ This function converts an rdmol to an openmm topology The rdmol coordinates are assumed to be in Angstrom unit Parameters: ----------- mol: rdmol molecule The molecule to convert confId: int The id of the conformer from which coordinates will be taken from `mol` Return: ------- topology : OpenMM Topology The generated OpenMM topology positions : OpenMM Quantity The molecule atom positions associated with the generated topology in Angstrom units """ mol = Chem.MolFromPDBBlock(Chem.MolToPDBBlock(mol))# hacky way to get all atom have PDB file relevant fields topology = app.Topology() rdk_atom_to_openmm = {} _chains = set([]) atoms_grouped = groupby(mol.GetAtoms(), lambda atm : (atm.GetPDBResidueInfo().GetChainId(), atm.GetPDBResidueInfo().GetResidueNumber(), atm.GetPDBResidueInfo().GetResidueName())) #CESHI fails when not read from PDB for key, residue in atoms_grouped: chainId, resNum, resName = key if chainId not in _chains: _chains.add(chainId) openmm_chain = topology.addChain(chainId) openmm_res = topology.addResidue(resName, openmm_chain) for atm in residue: element = app.element.Element.getByAtomicNumber(atm.GetAtomicNum()) openmm_at = topology.addAtom(atm.GetPDBResidueInfo().GetName().strip() , element, openmm_res) openmm_at.index = atm.GetIdx() rdk_atom_to_openmm[atm.GetIdx()] = openmm_at if topology.getNumAtoms() != mol.GetNumAtoms(): raise ValueError("OpenMM topology and RDMol number of atoms mismatching: " "OpenMM = {} vs RDMol = {}".format(topology.getNumAtoms(), mol.GetNumAtoms())) # Count the number of bonds in the openmm topology omm_bond_count = 0 def IsAmideBond(rdk_bond): # This supporting function checks if the passed bond is an amide bond or not. # Our definition of amide bond C-N between a Carbon and a Nitrogen atom is: # O # ║ # CA or O-C-N- # | # The amide bond C-N is a single bond if str(rdk_bond.GetBondType()) != "SINGLE" : return False atomB, atomE = rdk_bond.GetBeginAtom(), rdk_bond.GetEndAtom() # The amide bond is made by Carbon and Nitrogen atoms if not (atomB.GetAtomicNum() == 6 and atomE.GetAtomicNum() == 7 or (atomB.GetAtomicNum() == 7 and atomE.GetAtomicNum() == 6)): return False # Select Carbon and Nitrogen atoms if atomB.GetAtomicNum() == 6 : C_atom = atomB N_atom = atomE else: C_atom = atomE N_atom = atomB # Carbon and Nitrogen atoms must have 3 neighbour atoms if not (C_atom.GetDegree() == 3 and N_atom.GetDegree() == 3): #CESHI return False double_bonds, single_bonds = 0, 0 for bond in C_atom.GetBonds(): # The C-O bond can be single or double. if (bond.GetBeginAtom().GetAtomicNum() == 6 and bond.GetEndAtom().GetAtomicNum() == 8 ) or (bond.GetBeginAtom().GetAtomicNum() == 8 and bond.GetEndAtom().GetAtomicNum() == 6 ): if str(bond.GetBondType()) == "DOUBLE": double_bonds += 1 if str(bond.GetBondType()) == "SINGLE": single_bonds += 1 # The CA-C bond is single if (bond.GetBeginAtom().GetAtomicNum() == 6 and bond.GetEndAtom().GetAtomicNum() == 6 ): if str(bond.GetBondType()) == "SINGLE": single_bonds += 1 # Just one double and one single bonds are connected to C # In this case the bond is an amide bond if double_bonds == 1 and single_bonds == 1: return True else: return False # Creating bonds for bond in mol.GetBonds(): omm_bond_count += 1 # Set the bond type bond_order = bond.GetBondTypeAsDouble() if IsAmideBond(bond): omm_bond_type = "Amide" elif bond_order == 1.0: omm_bond_type = "Single" elif bond_order == 2.0: omm_bond_type = "Double" elif bond_order == 3.0: omm_bond_type = "Triple" elif bond_order == 1.5: omm_bond_type = "Aromatic" else: omm_bond_type = None topology.addBond( rdk_atom_to_openmm[bond.GetBeginAtom().GetIdx()], rdk_atom_to_openmm[bond.GetEndAtom().GetIdx()], type = omm_bond_type, order = bond_order) #CESHI the bond order calculated is a double, supposedly OpenMM takes an int value if omm_bond_count != mol.GetNumBonds(): raise ValueError("OpenMM topology and RDMol number of bonds mismatching: " "OpenMM = {} vs RDMol = {}".format(omm_bond_count, mol.GetNumBonds())) coords = mol.GetConformer(confId).GetPositions() positions = [Vec3(v[0], v[1], v[2]) for v in coords] * unit.angstroms return topology, positions
def writeModel(self, positions, unitCellDimensions=None, periodicBoxVectors=None): """Write out a model to the DCD file. The periodic box can be specified either by the unit cell dimensions (for a rectangular box), or the full set of box vectors (for an arbitrary triclinic box). If neither is specified, the box vectors specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. Parameters ---------- positions : list The list of atomic positions to write unitCellDimensions : Vec3=None The dimensions of the crystallographic unit cell. periodicBoxVectors : tuple of Vec3=None The vectors defining the periodic box. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file # Update the header. self._modelCount += 1 file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write( struct.pack('<i', self._firstStep + self._modelCount * self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxVectors = self._topology.getPeriodicBoxVectors() if boxVectors is not None: if periodicBoxVectors is not None: boxVectors = periodicBoxVectors elif unitCellDimensions is not None: if is_quantity(unitCellDimensions): unitCellDimensions = unitCellDimensions.value_in_unit( nanometers) boxVectors = (Vec3(unitCellDimensions[0], 0, 0), Vec3(0, unitCellDimensions[1], 0), Vec3(0, 0, unitCellDimensions[2])) * nanometers (a_length, b_length, c_length, alpha, beta, gamma) = computeLengthsAndAngles(boxVectors) a_length = a_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. b_length = b_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. c_length = c_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. angle1 = math.sin(math.pi / 2 - gamma) angle2 = math.sin(math.pi / 2 - beta) angle3 = math.sin(math.pi / 2 - alpha) file.write( struct.pack('<i6di', 48, a_length, angle1, b_length, angle2, angle3, c_length, 48)) length = struct.pack('<i', 4 * len(positions)) for i in range(3): file.write(length) data = array.array('f', (10 * x[i] for x in positions)) data.tofile(file) file.write(length)