def testReadingMassDifferenceInMolfiles(self): """Previously we were rounding incorrectly when reading the mass diff""" template = """ OpenBabel02181811152D 1 0 0 0 0 0 0 0 0 0999 V2000 0.0000 0.0000 0.0000 %2s %2d 0 0 0 0 0 0 0 0 0 0 0 M END """ # Positive test cases: # These are the BIOVIA Draw answers for the first 50 elements for # a mass diff of 1 answers = [2,5,8,10,12,13,15,17,20,21,24,25,28,29,32,33,36,41,40,41,46,49,52,53,56,57,60,60,65,66,71,74,76,80,81,85,86,89,90,92,94,97,99,102,104,107,109,113,116,120,123] for idx, answer in enumerate(answers): elem = idx + 1 molfile = template % (ob.GetSymbol(elem), 1) mol = pybel.readstring("mol", molfile).OBMol iso = mol.GetAtom(1).GetIsotope() self.assertEqual(answer, iso) # Also test D and T - BIOVIA Draw ignores the mass diff for elem, answer in zip("DT", [2, 3]): molfile = template % (elem, 1) mol = pybel.readstring("mol", molfile).OBMol iso = mol.GetAtom(1).GetIsotope() self.assertEqual(answer, iso) # Negative test cases: # Test error message for out-of-range values for value in [5, -4]: molfile = template % ("C", value) mol = pybel.readstring("mol", molfile).OBMol iso = mol.GetAtom(1).GetIsotope() self.assertEqual(0, iso)
def testWhetherAllElementsAreSupported(self): """Check whether a new element has been correctly added""" N = 0 while ob.GetSymbol(N+1): N += 1 # Is the symbol parsed? symbol = ob.GetSymbol(N) self.assertEqual(N, ob.GetAtomicNum(symbol)) # Has an exact mass been set? self.assertNotEqual(0.0, ob.GetExactMass(N)) # Has the symbol been added to the SMILES parser? numatoms = pybel.readstring("smi", "[%s]" % symbol).OBMol.NumAtoms() self.assertEqual(numatoms, 1) # Check whether the element is available as a constant self.assertEqual(N, getattr(ob, ob.GetName(N))) self.assertTrue(N > 100)
def read_txt_to_mol(txtfile): # lists to store the charges and LJ parameters q = [] eps = [] sig = [] # convert the .txt to a .xyz to read in pybel mol to perceive all the bonds, angles, dihedrals.. fd, temp_path = tempfile.mkstemp(suffix=".xyz") fxyz = os.fdopen(fd, 'w') # table to convert atomic number to symbols if not ob3: etab = openbabel.OBElementTable() with open(txtfile, 'r') as f: line = f.readline() combrule = line.strip() line = f.readline() if int(line.strip()) != 1: print( "Your .txt should have only one molecule, the one present in the .dfr" ) sys.exit(0) line = f.readline() natoms = int(line.split()[0]) # write .xyz header fxyz.write("%d\nGenerated from %s\n" % (natoms, txtfile)) for i in range(natoms): # write the atomic symbol and coordinates line = f.readline() atnum = int(line.split()[1]) x, y, z = [float(x) for x in line.split()[2:5]] if ob3: fxyz.write("%s\t%f\t%f\t%f\n" % (openbabel.GetSymbol(atnum), x, y, z)) else: fxyz.write("%s\t%f\t%f\t%f\n" % (etab.GetSymbol(atnum), x, y, z)) # store the parameters qv, epsv, sigv = [float(x) for x in line.split()[5:]] q.append(qv) eps.append(epsv) sig.append(sigv) fxyz.close() # read filel to a mol, remove temp and return mol = pybel.readfile("xyz", temp_path).__next__() os.remove(temp_path) return mol, q, eps, sig
def get_mol_info(mol): # table to convert atomic number to symbols if not ob3: etab = openbabel.OBElementTable() q_atoms = [] q_all = [] for atom in mol: if ob3: q_atoms.append(openbabel.GetSymbol(atom.atomicnum)) else: q_atoms.append(etab.GetSymbol(atom.atomicnum)) q_all.append(atom.coords) return np.asarray(q_atoms), np.asarray(q_all)
def itp_from_params(mol, q, eps, sig, dfrBonds, dfrAngles, dfrDihedrals, dfrImpDih): # table to convert atomic number to symbols if not ob3: etab = openbabel.OBElementTable() # !!! units are converted as the reverse of: http://chembytes.wikidot.com/oplsaagro2tnk and based on GROMACS manual # write header fcontent = """ ; ; Generated by dice2gromacs ; https://github.com/hmcezar/dicetools ; [ atomtypes ] ;name bond_type mass charge ptype sigma epsilon """ # write the atomtypes for i, atom in enumerate(mol.atoms): if ob3: fcontent += "att_%03d %s%03d %7.4f 0.000 A %.5e %.5e\n" % ( i + 1, openbabel.GetSymbol(atom.atomicnum), i + 1, atom.atomicmass, a2nm(sig[i]), cal2j(eps[i])) else: fcontent += "att_%03d %s%03d %7.4f 0.000 A %.5e %.5e\n" % ( i + 1, etab.GetSymbol(atom.atomicnum), i + 1, atom.atomicmass, a2nm(sig[i]), cal2j(eps[i])) fcontent += """ [ moleculetype ] ;name nrexcl UNL 3 [ atoms ] ; nr type resi res atom cgnr charge mass """ # write the atoms for i, atom in enumerate(mol.atoms): if ob3: fcontent += "%6d att_%03d 1 UNL %s%03d 1 %.4f %7.4f\n" % ( i + 1, i + 1, openbabel.GetSymbol( atom.atomicnum), i + 1, q[i], atom.atomicmass) else: fcontent += "%6d att_%03d 1 UNL %s%03d 1 %.4f %7.4f\n" % ( i + 1, i + 1, etab.GetSymbol( atom.atomicnum), i + 1, q[i], atom.atomicmass) # write the bonds fcontent += """ [ bonds ] ; ai aj funct r k """ for bnd in dfrBonds: ai, aj = [int(x) for x in bnd.split()] fcontent += "%6d %6d 1 %.4f %.4f\n" % ( ai, aj, a2nm(dfrBonds[bnd][1]), cal2j(dfrBonds[bnd][0]) * 200.0) # write the angles fcontent += """ [ angles ] ; ai aj ak funct theta cth """ for ang in dfrAngles: ai, aj, ak = [int(x) for x in ang.split()] fcontent += "%6d %6d %6d 1 %.4f %.4f\n" % ( ai, aj, ak, dfrAngles[ang][1], cal2j(dfrAngles[ang][0]) * 2.0) # write the proper dihedrals fcontent += """ [ dihedrals ] ; proper dihedrals - converted to the RB form from Fourier type if OPLS ; ai aj ak al func params """ fimp = "" for dih in dfrDihedrals: ai, aj, ak, al = [int(x) for x in dih.split()] if dfrDihedrals[dih][0].lower() == "amber": # check if it's a proper or improper dihedral bondIterator = openbabel.OBMolBondIter(mol.OBMol) cnt = 0 for bond in bondIterator: if ((ai == bond.GetBeginAtom().GetId() + 1) and (aj == bond.GetEndAtom().GetId() + 1)) or ( (aj == bond.GetBeginAtom().GetId() + 1) and (ai == bond.GetEndAtom().GetId() + 1)): cnt += 1 elif ((aj == bond.GetBeginAtom().GetId() + 1) and (ak == bond.GetEndAtom().GetId() + 1)) or ( (ak == bond.GetBeginAtom().GetId() + 1) and (aj == bond.GetEndAtom().GetId() + 1)): cnt += 1 elif ((ak == bond.GetBeginAtom().GetId() + 1) and (al == bond.GetEndAtom().GetId() + 1)) or ( (al == bond.GetBeginAtom().GetId() + 1) and (ak == bond.GetEndAtom().GetId() + 1)): cnt += 1 if cnt == 3: func = 9 fparam = [ cal2j(float(x)) / 2.0 for x in dfrDihedrals[dih][1:4] ] for i, term in enumerate(fparam, 1): if term != 0.0: fcontent += "%6d %6d %6d %6d %1d %6.2f %9.5f %d\n" % ( ai, aj, ak, al, func, float(dfrDihedrals[dih][i + 3]), term, i) else: func = 4 fparam = [ cal2j(float(x)) / 2.0 for x in dfrDihedrals[dih][1:4] ] for i, term in enumerate(fparam, 1): if term != 0.0: fimp += "%6d %6d %6d %6d %1d %6.2f %9.5f %d\n" % ( ai, aj, ak, al, func, float(dfrDihedrals[dih][i + 3]), term, i) elif dfrDihedrals[dih][0].lower() == "opls": fparam = [cal2j(float(x)) for x in dfrDihedrals[dih][1:4]] c0 = fparam[1] + 0.5 * (fparam[0] + fparam[2]) c1 = 0.5 * (-fparam[0] + 3.0 * fparam[2]) c2 = -fparam[1] c3 = -2.0 * fparam[2] c4 = 0.0 c5 = 0.0 fcontent += "%6d %6d %6d %6d 3 %9.5f %9.5f %9.5f %9.5f %9.5f %9.5f\n" % ( ai, aj, ak, al, c0, c1, c2, c3, c4, c5) else: print( "Error: Dihedral type (%s) found for dihedral %s in .dfr is not valid." % (dfrDihedrals[dih][0], dih)) sys.exit(0) # write the improper dihedrals if fimp or dfrImpDih: fcontent += """ [ dihedrals ] ; improper dihedrals ; ai aj ak al func params """ if fimp: fcontent += fimp for idih in dfrImpDih: ai, aj, ak, al = [int(x) for x in idih.split()] fcontent += "%6d %6d %6d %6d 4 %6.2f %9.5f %d\n" % ( ai, aj, ak, al, float( dfrImpDih[idih][1]), cal2j(float(dfrImpDih[idih][0])), 2) # write the pairs fcontent += """ [ pairs ] """ pairs = get_pairs(mol) for pair in pairs: ai, aj = [int(x) for x in pair.split()] fcontent += "%6d %6d 1\n" % (ai, aj) return fcontent
def parse_mol_info(fname, fcharges, axis, buffa, buffo, pbcbonds, printdih, ignorebonds, ignoreimproper): iaxis = {"x": 0, "y": 1, "z": 2} if axis in iaxis: repaxis = iaxis[axis] else: print("Error: invalid axis") sys.exit(0) if fcharges: chargesLabel = {} with open(fcharges, "r") as f: for line in f: chargesLabel[line.split()[0]] = float(line.split()[1]) # set openbabel file format base, ext = os.path.splitext(fname) obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats(ext[1:], "xyz") # trick to disable ring perception and make the ReadFile waaaay faster # Source: https://sourceforge.net/p/openbabel/mailman/openbabel-discuss/thread/56e1812d-396a-db7c-096d-d378a077853f%40ipcms.unistra.fr/#msg36225392 obConversion.AddOption("b", openbabel.OBConversion.INOPTIONS) # read molecule to OBMol object mol = openbabel.OBMol() obConversion.ReadFile(mol, fname) mol.ConnectTheDots() # necessary because of the 'b' INOPTION # split the molecules molecules = mol.Separate() # detect the molecules types mTypes = {} mapmTypes = {} atomIdToMol = {} nty = 0 for i, submol in enumerate(molecules, start=1): atomiter = openbabel.OBMolAtomIter(submol) atlist = [] for at in atomiter: atlist.append(at.GetAtomicNum()) atomIdToMol[at.GetId()] = i foundType = None for ty in mTypes: # check if there's already a molecule of this type if atlist == mTypes[ty]: foundType = ty # if not, create a new type if not foundType: nty += 1 foundType = nty mTypes[nty] = atlist mapmTypes[i] = foundType # get atomic labels from pdb idToAtomicLabel = {} if ext[1:] == "pdb": for res in openbabel.OBResidueIter(mol): for atom in openbabel.OBResidueAtomIter(res): if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1): idToAtomicLabel[ atom.GetId()] = res.GetAtomID(atom).strip() + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: idToAtomicLabel[atom.GetId()] = res.GetAtomID(atom).strip() else: if not ob3: etab = openbabel.OBElementTable() for atom in openbabel.OBMolAtomIter(mol): if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1): if ob3: idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol( atom.GetAtomicNum()) + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: idToAtomicLabel[atom.GetId()] = etab.GetSymbol( atom.GetAtomicNum()) + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: if ob3: idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol( atom.GetAtomicNum()) else: idToAtomicLabel[atom.GetId()] = etab.GetSymbol( atom.GetAtomicNum()) # print(idToAtomicLabel) # identify atom types and get masses outMasses = "Masses\n\n" massTypes = {} mapTypes = {} nmassTypes = 0 atomIterator = openbabel.OBMolAtomIter(mol) for atom in atomIterator: i = atom.GetId() if idToAtomicLabel[i] not in massTypes: nmassTypes += 1 mapTypes[nmassTypes] = idToAtomicLabel[i] massTypes[idToAtomicLabel[i]] = nmassTypes outMasses += "\t%d\t%.3f\t# %s\n" % ( nmassTypes, atom.GetAtomicMass(), idToAtomicLabel[i]) # create atoms list outAtoms = "Atoms # full\n\n" xmin = float("inf") xmax = float("-inf") ymin = float("inf") ymax = float("-inf") zmin = float("inf") zmax = float("-inf") natoms = 0 acoords = [] for mnum, imol in enumerate(molecules, start=1): atomIterator = openbabel.OBMolAtomIter(imol) for atom in sorted(atomIterator, key=lambda x: x.GetId()): natoms += 1 i = atom.GetId() apos = (atom.GetX(), atom.GetY(), atom.GetZ()) acoords.append(Atom(atom.GetAtomicNum(), apos)) # look for the maximum and minimum x for the box (improve later with numpy and all coordinates) if apos[0] > xmax: xmax = apos[0] if apos[0] < xmin: xmin = apos[0] if apos[1] > ymax: ymax = apos[1] if apos[1] < ymin: ymin = apos[1] if apos[2] > zmax: zmax = apos[2] if apos[2] < zmin: zmin = apos[2] if fcharges: outAtoms += "\t%d\t%d\t%d\t%.6f\t%.4f\t%.4f\t%.4f\t# %s\n" % ( i + 1, mnum, massTypes[idToAtomicLabel[i]], chargesLabel[idToAtomicLabel[i]], atom.GetX(), atom.GetY(), atom.GetZ(), idToAtomicLabel[i]) else: outAtoms += "\t%d\t%d\t%d\tX.XXXXXX\t%.4f\t%.4f\t%.4f\t# %s\n" % ( i + 1, mnum, massTypes[idToAtomicLabel[i]], atom.GetX(), atom.GetY(), atom.GetZ(), idToAtomicLabel[i]) # define box shape and size try: fromBounds = False rcell = mol.GetData(12) cell = openbabel.toUnitCell(rcell) v1 = [ cell.GetCellVectors()[0].GetX(), cell.GetCellVectors()[0].GetY(), cell.GetCellVectors()[0].GetZ() ] v2 = [ cell.GetCellVectors()[1].GetX(), cell.GetCellVectors()[1].GetY(), cell.GetCellVectors()[1].GetZ() ] v3 = [ cell.GetCellVectors()[2].GetX(), cell.GetCellVectors()[2].GetY(), cell.GetCellVectors()[2].GetZ() ] boxinfo = [v1, v2, v3] orthogonal = True for i, array in enumerate(boxinfo): for j in range(3): if i == j: continue if not math.isclose(0., array[j], abs_tol=1e-6): orthogonal = False except: fromBounds = True v1 = [xmax - xmin, 0., 0.] v2 = [0., ymax - ymin, 0.] v3 = [0., 0., zmax - zmin] orthogonal = True # add buffer if orthogonal: buf = [] boxinfo = [v1, v2, v3] for i, val in enumerate(boxinfo[repaxis]): if i == repaxis: buf.append(val + buffa) else: buf.append(val) boxinfo[repaxis] = buf for i in range(3): if i == repaxis: continue buf = [] for j, val in enumerate(boxinfo[i]): if j == i: buf.append(val + buffo) else: buf.append(val) boxinfo[i] = buf # print(boxinfo) # Duplicate to get the bonds in the PBC. Taken from (method _crd2bond): # https://github.com/tongzhugroup/mddatasetbuilder/blob/66eb0f15e972be0f5534dcda27af253cd8891ff2/mddatasetbuilder/detect.py#L213 if pbcbonds: acoords = Atoms(acoords, cell=boxinfo, pbc=True) repatoms = acoords.repeat( 2 )[natoms:] # repeat the unit cell in each direction (len(repatoms) = 7*natoms) tree = cKDTree(acoords.get_positions()) d = tree.query(repatoms.get_positions(), k=1)[0] nearest = d < 8. ghost_atoms = repatoms[nearest] realnumber = np.where(nearest)[0] % natoms acoords += ghost_atoms write("replicated.xyz", acoords) # write the structure with the replicated atoms # write new mol with new bonds nmol = openbabel.OBMol() nmol.BeginModify() for idx, (num, position) in enumerate( zip(acoords.get_atomic_numbers(), acoords.positions)): a = nmol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) nmol.ConnectTheDots() # nmol.PerceiveBondOrders() # super slow becauses it looks for rings nmol.EndModify() else: acoords = Atoms(acoords, cell=boxinfo, pbc=False) nmol = openbabel.OBMol() nmol.BeginModify() for idx, (num, position) in enumerate( zip(acoords.get_atomic_numbers(), acoords.positions)): a = nmol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) nmol.ConnectTheDots() # nmol.PerceiveBondOrders() # super slow becauses it looks for rings nmol.EndModify() # identify bond types and create bond list outBonds = "Bonds # harmonic\n\n" bondTypes = {} mapbTypes = {} nbondTypes = 0 nbonds = 0 bondsToDelete = [] bondIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: bondIterators.append(openbabel.OBMolBondIter(smol)) else: bondIterators.append(openbabel.OBMolBondIter(nmol)) lastidx = 1 for iterator in bondIterators: for i, bond in enumerate(iterator, lastidx): b1 = bond.GetBeginAtom().GetId() b2 = bond.GetEndAtom().GetId() # check if its a bond of the replica only if (b1 >= natoms) and (b2 >= natoms): bondsToDelete.append(bond) continue # remap to a real atom if needed if b1 >= natoms: b1 = realnumber[b1 - natoms] if b2 >= natoms: b2 = realnumber[b2 - natoms] # identify bond type btype1 = "%s - %s" % (idToAtomicLabel[b1], idToAtomicLabel[b2]) btype2 = "%s - %s" % (idToAtomicLabel[b2], idToAtomicLabel[b1]) if btype1 in bondTypes: bondid = bondTypes[btype1] bstring = btype1 elif btype2 in bondTypes: bondid = bondTypes[btype2] bstring = btype2 else: nbondTypes += 1 mapbTypes[nbondTypes] = btype1 bondid = nbondTypes bondTypes[btype1] = nbondTypes bstring = btype1 nbonds += 1 outBonds += "\t%d\t%d\t%d\t%d\t# %s\n" % (nbonds, bondid, b1 + 1, b2 + 1, bstring) lastidx = i # delete the bonds of atoms from other replicas for bond in bondsToDelete: nmol.DeleteBond(bond) # identify angle types and create angle list angleTypes = {} mapaTypes = {} nangleTypes = 0 nangles = 0 angleIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.FindAngles() angleIterators.append(openbabel.OBMolAngleIter(smol)) prevnumatoms = sepmols[0].NumAtoms() else: nmol.FindAngles() angleIterators.append(openbabel.OBMolAngleIter(nmol)) outAngles = "Angles # harmonic\n\n" lastidx = 1 for j, iterator in enumerate(angleIterators, 1): for i, angle in enumerate(iterator, lastidx): if ignorebonds: a1 = angle[1] + prevnumatoms a2 = angle[0] + prevnumatoms a3 = angle[2] + prevnumatoms else: a1 = angle[1] a2 = angle[0] a3 = angle[2] # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] atype1 = "%s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3]) atype2 = "%s - %s - %s" % ( idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if atype1 in angleTypes: angleid = angleTypes[atype1] astring = atype1 elif atype2 in angleTypes: angleid = angleTypes[atype2] astring = atype2 else: nangleTypes += 1 mapaTypes[nangleTypes] = atype1 angleid = nangleTypes angleTypes[atype1] = nangleTypes astring = atype1 nangles += 1 outAngles += "\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( nangles, angleid, a1 + 1, a2 + 1, a3 + 1, astring) lastidx = i if ignorebonds: prevnumatoms += sepmols[j].NumAtoms() # identify dihedral types and create dihedral list if printdih: dihedralTypes = {} mapdTypes = {} ndihedralTypes = 0 ndihedrals = 0 dihedralIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.FindTorsions() dihedralIterators.append(openbabel.OBMolTorsionIter(smol)) else: nmol.FindTorsions() dihedralIterators.append(openbabel.OBMolTorsionIter(nmol)) outDihedrals = "Dihedrals # charmmfsw\n\n" lastidx = 1 for iterator in dihedralIterators: for i, dihedral in enumerate(iterator, lastidx): a1 = dihedral[0] a2 = dihedral[1] a3 = dihedral[2] a4 = dihedral[3] # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] if a4 >= natoms: a4 = realnumber[a4 - natoms] dtype1 = "%s - %s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3], idToAtomicLabel[a4]) dtype2 = "%s - %s - %s - %s" % ( idToAtomicLabel[a4], idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if dtype1 in dihedralTypes: dihedralid = dihedralTypes[dtype1] dstring = dtype1 elif dtype2 in dihedralTypes: dihedralid = dihedralTypes[dtype2] dstring = dtype2 else: ndihedralTypes += 1 mapdTypes[ndihedralTypes] = dtype1 dihedralid = ndihedralTypes dihedralTypes[dtype1] = ndihedralTypes dstring = dtype1 ndihedrals += 1 outDihedrals += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( ndihedrals, dihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1, dstring) lastidx = i if not ignoreimproper: # look for the improper dihedrals improperDihedralTypes = {} mapiDTypes = {} niDihedralTypes = 0 niDihedrals = 0 mollist = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.PerceiveBondOrders() mollist.append(smol) else: nmol.PerceiveBondOrders() mollist.append(nmol) outImpropers = "Impropers # harmonic\n\n" for imol in mollist: atomIterator = openbabel.OBMolAtomIter(imol) for atom in atomIterator: try: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetValence()) expDegree = atom.GetValence() except: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetExplicitDegree()) expDegree = atom.GetExplicitDegree() # returns impropers for atoms with connected to other 3 atoms and SP2 hybridization if atom.GetHyb() == 2 and expDegree == 3: connectedAtoms = [] for atom2, depth in openbabel.OBMolAtomBFSIter( imol, atom.GetId() + 1): if depth == 2: connectedAtoms.append(atom2) torsional = [ atom.GetId() + 1, connectedAtoms[0].GetId() + 1, connectedAtoms[1].GetId() + 1, connectedAtoms[2].GetId() + 1 ] a1 = torsional[0] - 1 a2 = torsional[1] - 1 a3 = torsional[2] - 1 a4 = torsional[3] - 1 # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] if a4 >= natoms: a4 = realnumber[a4 - natoms] dtype1 = "%s - %s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3], idToAtomicLabel[a4]) dtype2 = "%s - %s - %s - %s" % ( idToAtomicLabel[a4], idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if dtype1 in improperDihedralTypes: idihedralid = improperDihedralTypes[dtype1] dstring = dtype1 elif dtype2 in improperDihedralTypes: idihedralid = improperDihedralTypes[dtype2] dstring = dtype2 else: niDihedralTypes += 1 mapiDTypes[niDihedralTypes] = dtype1 idihedralid = niDihedralTypes improperDihedralTypes[dtype1] = niDihedralTypes dstring = dtype1 niDihedrals += 1 outImpropers += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( niDihedrals, idihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1, dstring) # print header if printdih and (ndihedrals > 0): if ignoreimproper or (niDihedrals == 0): header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\n" % ( fname, natoms, nbonds, nangles, ndihedrals, nmassTypes, nbondTypes, nangleTypes, ndihedralTypes) else: header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\t%d impropers\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\t%d improper types\n\n" % ( fname, natoms, nbonds, nangles, ndihedrals, niDihedrals, nmassTypes, nbondTypes, nangleTypes, ndihedralTypes, niDihedralTypes) else: header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\n" % ( fname, natoms, nbonds, nangles, nmassTypes, nbondTypes, nangleTypes) # add box info if fromBounds: boxsize = [(xmin, xmax), (ymin, ymax), (zmin, zmax)] boxsize[repaxis] = (boxsize[repaxis][0] - buffa / 2., boxsize[repaxis][1] + buffa / 2.) for i in range(3): if i == repaxis: continue boxsize[i] = (boxsize[i][0] - buffo / 2., boxsize[i][1] + buffo / 2.) header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % ( boxsize[0][0], boxsize[0][1], boxsize[1][0], boxsize[1][1], boxsize[2][0], boxsize[2][1]) else: if orthogonal: header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % ( 0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2]) else: header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n\t%.8f\t%.8f\t%.8f\t xy xz yz\n" % ( 0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2], boxinfo[1][0], boxinfo[2][0], boxinfo[2][1]) # print Coeffs outCoeffs = "Pair Coeffs\n\n" for i in range(1, nmassTypes + 1): outCoeffs += "\t%d\teps\tsig\t# %s\n" % (i, mapTypes[i]) outCoeffs += "\nBond Coeffs\n\n" for i in range(1, nbondTypes + 1): outCoeffs += "\t%d\tK\tr_0\t# %s\n" % (i, mapbTypes[i]) outCoeffs += "\nAngle Coeffs\n\n" for i in range(1, nangleTypes + 1): outCoeffs += "\t%d\tK\ttetha_0 (deg)\t# %s\n" % (i, mapaTypes[i]) if printdih and (ndihedrals > 0): outCoeffs += "\nDihedral Coeffs\n\n" for i in range(1, ndihedralTypes + 1): outCoeffs += "\t%d\tK\tn\tphi_0 (deg)\tw\t# %s\n" % (i, mapdTypes[i]) if not ignoreimproper and (niDihedralTypes > 0): outCoeffs += "\nImproper Coeffs\n\n" for i in range(1, niDihedralTypes + 1): outCoeffs += "\t%d\tK\txi_0 (deg)\t# %s\n" % (i, mapiDTypes[i]) if printdih and (ndihedrals > 0): if ignoreimproper or (niDihedralTypes == 0): return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals else: return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals + "\n" + outImpropers else: return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles
def top2dfr(topfile, geomfile, flexfrag, eqgeom, savefrags, topcharges, ffname, path): if "amber" in ffname: potname = "AMBER" else: potname = "OPLS" # get the atomic positions from the geometry file base, ext = os.path.splitext(geomfile) if not ob3: etab = openbabel.OBElementTable() mol = pybel.readfile(ext[1:], geomfile).__next__() molxyzinfo = {} for i, atom in enumerate(mol, 1): if ob3: molxyzinfo[i] = [openbabel.GetSymbol(atom.atomicnum), atom.coords] else: molxyzinfo[i] = [etab.GetSymbol(atom.atomicnum), atom.coords] # first pass through the topology file to get the data into a dictionary tdata = {} tdata["[ improper ]"] = [] with open(topfile) as f: while True: line = f.readline() if not line: break if line.strip().startswith(";") or line.strip().startswith( "#") or len(line.strip()) == 0: continue if strip_comment(line).startswith("[ "): if strip_comment(line) not in tdata: key = strip_comment(line) tdata[key] = [] else: # remove comments from the line if "improper" not in line: line = strip_comment(line) if "improper" in line: tdata["[ improper ]"].append(line) else: try: tdata[key].append(line) except: print( "You have a line (%s) of data before assigning a type of entry (such as [ atoms ], [ bonds ] ...)" % (line)) sys.exit(0) # check if atoms were found if "[ atoms ]" not in tdata: print( "The [ atoms ] section was not found in your topology, make sure you're using a single file without #include" ) sys.exit(0) # get the atoms and its positions and parameters in a dictionary atoms = OrderedDict() rdfs = {} atom_num = 1 rdf_label = 1 for line in tdata["[ atoms ]"]: atomlbl = line.split()[0] atoms[atomlbl] = [] fromatomtype = True # if atomtypes are in the topology, use them. Otherwise get from ffnonbonded.itp ffline = "" if "[ atomtypes ]" in tdata: ffline = lookup_ljparam_ifile(line.split()[1], tdata["[ atomtypes ]"]) # get the data from the ffnonbonded.itp if not ffline: fromatomtype = False ffline = lookup_ljparam(line.split()[1], path) # if the atomtype was not found, stop if not ffline: print( "Atom type %s was not found neither in the .itp or the force field directory" % (line.split()[1])) sys.exit(0) # append the data to the list in the same order it will be written in the .dfr atoms[atomlbl].append(str(atom_num)) atom_num += 1 if "amber" in ffname: atomsp = ffline.split()[1] else: atomsp = ffline.split()[2] if atomsp not in rdfs: rdfs[atomsp] = str(rdf_label) rdf_label += 1 atoms[atomlbl].append(rdfs[atomsp]) if "[ atomtypes ]" in tdata: atoms[atomlbl].append(mol.atoms[int(atomlbl) - 1].atomicnum) else: atoms[atomlbl].append(atomsp) x, y, z = molxyzinfo[int(atomlbl)][1] atoms[atomlbl].append(str(x)) atoms[atomlbl].append(str(y)) atoms[atomlbl].append(str(z)) if ("amber" in ffname) or fromatomtype: if topcharges: atoms[atomlbl].append(line.split()[6]) else: atoms[atomlbl].append(ffline.split()[-4]) atoms[atomlbl].append(str(j2cal(float(ffline.split()[-1])))) atoms[atomlbl].append(str(nm2a(float(ffline.split()[-2])))) # the last one will not be printed but is needed to retrieve the force constants if ("[ atomtypes ]" in tdata) and ("opls" in ffname): atoms[atomlbl].append(ffline.split()[1]) else: atoms[atomlbl].append(ffline.split()[0]) else: if topcharges: atoms[atomlbl].append(line.split()[6]) else: atoms[atomlbl].append(ffline.split()[4]) atoms[atomlbl].append(str(j2cal(float(ffline.split()[7])))) atoms[atomlbl].append(str(nm2a(float(ffline.split()[6])))) # the last one will not be printed but is needed to retrieve the opls-aa force constants atoms[atomlbl].append(ffline.split()[1]) # print(atoms[atomlbl]) # now create the fragment connection list from this file and store it in fraginfo # creates a temporary xyz using mkstemp (https://www.logilab.org/blogentry/17873) # this file will be used to get the fragment data fd, temp_path = tempfile.mkstemp(suffix=".xyz") fxyz = os.fdopen(fd, 'w') fxyz.write(mol.write("xyz")) fxyz.close() generate_fragfile(temp_path, "header") base, ext = os.path.splitext(temp_path) fraginfo = [] with open(base + ".dfr") as f: while True: line = f.readline() if line.strip() != "$atoms fragments": continue else: while line.strip() != "$end fragment connection": if flexfrag: fraginfo.append(line.replace("R", "M")) else: fraginfo.append(line) line = f.readline() fraginfo.append(line) break # remove the files os.remove(temp_path) os.remove(base + ".dfr") os.remove(base + ".txt") if savefrags: shutil.move( base + "_fragments", os.path.join( os.path.dirname(os.path.abspath(geomfile)), os.path.splitext(os.path.basename(topfile))[0] + "_fragments")) else: shutil.rmtree(base + "_fragments") # !!! units should be converted as in: http://chembytes.wikidot.com/oplsaagro2tnk !!! # get the bond info bonds = [] for line in tdata["[ bonds ]"]: # get parameters from user's .itp if (len(line.split()) == 5): if eqgeom: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(line.split()[4])) / (200.0), 4)) + "\t" + str( round( mol.OBMol.GetBond(int(line.split( )[0]), int(line.split()[1])).GetLength(), 4)) + "\n") else: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(line.split()[4])) / (200.0), 4)) + "\t" + str(nm2a(float(line.split()[3]))) + "\n") # get parameters from ffbonded.itp else: ffline = lookup_ffbond(atoms[line.split()[0]][9], atoms[line.split()[1]][9], path) if ffline == "not found": bonds.append( line.split()[0] + " " + line.split()[1] + " \tXXX\t" + str( round( mol.OBMol.GetBond(int(line.split( )[0]), int(line.split()[1])).GetLength(), 4)) + "\n") elif eqgeom: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(ffline.split()[4])) / (200.0), 4)) + "\t" + str( round( mol.OBMol.GetBond(int(line.split( )[0]), int(line.split()[1])).GetLength(), 4)) + "\n") else: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(ffline.split()[4])) / (200.0), 4)) + "\t" + str(nm2a(float(ffline.split()[3]))) + "\n") # get the angle info angles = [] for line in tdata["[ angles ]"]: # get parameters from user's .itp if (len(line.split()) == 6): if eqgeom: angles.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(line.split()[5])) / (2.0)) + "\t" + str( round( mol.OBMol.GetAngle( mol.OBMol.GetAtom(int(line.split()[0])), mol.OBMol.GetAtom(int(line.split()[1])), mol.OBMol.GetAtom(int(line.split()[2]))), 4)) + "\n") else: angles.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(line.split()[5])) / (2.0)) + "\t" + str(float(line.split()[4])) + "\n") # get parameters from ffbonded.itp else: ffline = lookup_ffangle(atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], path) if ffline == "not found": angles.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\tXXX\t" + str( round( mol.OBMol.GetAngle( mol.OBMol.GetAtom(int(line.split()[0])), mol.OBMol.GetAtom(int(line.split()[1])), mol.OBMol.GetAtom(int(line.split()[2]))), 4)) + "\n") elif eqgeom: angles.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(ffline.split()[5])) / (2.0)) + "\t" + str( round( mol.OBMol.GetAngle( mol.OBMol.GetAtom(int(line.split()[0])), mol.OBMol.GetAtom(int(line.split()[1])), mol.OBMol.GetAtom(int(line.split()[2]))), 4)) + "\n") else: angles.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(ffline.split()[5])) / (2.0)) + "\t" + ffline.split()[4] + "\n") # get the dihedrals info dihedrals = [] pline = {} ipline = {} dih9 = False dih4 = False for rline in tdata["[ dihedrals ]"]: line = strip_comment(rline) ffline = "" # get parameters from user's .itp if (len(line.split()) == 11 and line.split()[4] == '3'): V3 = round(-j2cal(float(line.split()[8]) / 2.0), 3) V2 = round(-j2cal(float(line.split()[7])), 3) V1 = round(-2.0 * j2cal(float(line.split()[6])) + 3.0 * V3, 3) dihedrals.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(V1) + "\t" + str(V2) + "\t" + str(V3) + "\t0.0\t0.0\t0.0\n") elif (len(line.split()) == 8 and line.split()[4] == '9'): dih9 = True dihline = "%s %s %s %s" % (line.split()[0], line.split()[1], line.split()[2], line.split()[3]) if dihline in pline: pline[dihline].append(line) else: pline[dihline] = [line] elif (len(line.split()) == 8 and (line.split()[4] == '4' or line.split()[4] == '1')): dih4 = True dihline = "%s %s %s %s" % (line.split()[0], line.split()[1], line.split()[2], line.split()[3]) if dihline in ipline: ipline[dihline].append(line) else: ipline[dihline] = [line] # get parameters from ffbonded.itp elif len(line.split()) == 5: ffline = lookup_ffdihedral(atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], atoms[line.split()[3]][9], int(line.split()[4]), ffname, path) else: print( "Error: something is wrong in dihedral line (%s) maybe the number of parameters?" % (line)) sys.exit(0) # parameters from ffbonded.itp need to be converted and stored if ffline: if ffline == "not found": dihedrals.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\tXXX\tXXX\tXXX" + "\t0.0\t0.0\t0.0\n") continue if "amber" in ffname: # parameters are already of Fourier type, just need to convert to cal if float(ffline.split()[8]) == 0.: dihedrals.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(j2cal(float(ffline.split()[5]))) + "\t" + str(j2cal(float(ffline.split()[6]))) + "\t" + str(j2cal(float(ffline.split()[7]))) + "\t" + str(round(float(ffline.split()[9]), 1)) + "\t" + str(round(float(ffline.split()[10]), 1)) + "\t" + str(round(float(ffline.split()[11]), 1)) + "\n") else: dihedrals.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(j2cal(float(ffline.split()[5]))) + "\t" + str(j2cal(float(ffline.split()[6]))) + "\t" + str(j2cal(float(ffline.split()[7]))) + "\t" + str(j2cal(float(ffline.split()[8]))) + "\t" + str(round(float(ffline.split()[9]), 1)) + "\t" + str(round(float(ffline.split()[10]), 1)) + "\t" + str(round(float(ffline.split()[11]), 1)) + "\t" + str(round(float(ffline.split()[12]), 1)) + "\n") else: if float(ffline.split()[9]) != 0.0 or float( ffline.split()[10]) != 0.0: print( "Parameters for %s - %s - %s - %s dihedrals are undefined, please treat by hand!" % (atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], atoms[line.split()[3]][9])) V3 = round(-j2cal(float(ffline.split()[8]) / 2.0), 3) V2 = round(-j2cal(float(ffline.split()[7])), 3) V1 = round(-2.0 * j2cal(float(ffline.split()[6])) + 3.0 * V3, 3) dihedrals.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(V1) + "\t" + str(V2) + "\t" + str(V3) + "\t0.0\t0.0\t0.0\n") # after reading all the dihedrals, if type 9 was used, we need to convert if dih9: for kdih in pline: params = [0.0] * 6 for line in pline[kdih]: n = int(line.split()[7]) # have to multiply by 2.0 since I use the 0.5*(...) version of the AMBER definition params[n - 1] = 2.0 * float(line.split()[6]) params[n + 2] = float(line.split()[5]) retline = "%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f" % ( kdih, 9, params[0], params[1], params[2], params[3], params[4], params[5]) dihedrals.append(retline.split()[0] + " " + retline.split()[1] + " " + retline.split()[2] + " " + retline.split()[3] + " \t" + potname + "\t" + str(j2cal(float(retline.split()[5]))) + "\t" + str(j2cal(float(retline.split()[6]))) + "\t" + str(j2cal(float(retline.split()[7]))) + "\t" + retline.split()[8] + "\t" + retline.split()[9] + "\t" + retline.split()[10] + "\n") # finally, add the improper dihedrals described as proper dihedrals if dih4: for kdih in ipline: params = [0.0] * 6 for line in ipline[kdih]: n = int(line.split()[7]) # have to multiply by 2.0 since I use the 0.5*(...) version of the AMBER definition params[n - 1] = 2.0 * float(line.split()[6]) params[n + 2] = float(line.split()[5]) retline = "%s\t%d\t%.3f\t%.3f\t%.3f\t%.2f\t%.2f\t%.2f" % ( kdih, 9, params[0], params[1], params[2], params[3], params[4], params[5]) dihedrals.append(retline.split()[0] + " " + retline.split()[1] + " " + retline.split()[2] + " " + retline.split()[3] + " \t" + "AMBER" + "\t" + str(j2cal(float(retline.split()[5]))) + "\t" + str(j2cal(float(retline.split()[6]))) + "\t" + str(j2cal(float(retline.split()[7]))) + "\t" + retline.split()[8] + "\t" + retline.split()[9] + "\t" + retline.split()[10] + "\n") # get the improper dihedrals info improper = [] for line in tdata["[ improper ]"]: if len(strip_comment(line).split()) >= 7: improper.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + str(round(j2cal(float(line.split()[6])), 3)) + "\t" + line.split()[5] + "\n") else: if "opls" in ffname: ffline = lookup_ffimproper(line.split()[5], path) improper.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + str(round(j2cal(float(ffline.split()[3])), 3)) + "\t" + ffline.split()[2] + "\n") else: ffline = lookup_ffdihedral(atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], atoms[line.split()[3]][9], 4, path) improper.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + "OPLS" + "\t" + str(j2cal(float(ffline.split()[5]))) + "\t" + str(j2cal(float(ffline.split()[6]))) + "\t" + str(j2cal(float(ffline.split()[7]))) + "\t" + ffline.split()[8] + "\t" + ffline.split()[9] + "\t" + ffline.split()[10] + "\n") # print everything to the output file base, ext = os.path.splitext(geomfile) with open(base + ".txt", "w") as f: f.write("*\n1\n") f.write( str(len(atoms)) + " \t %s (generated with gromacs2dice)\n" % (os.path.basename(base))) for atom, data in atoms.items(): f.write( "%2d %2d \t %7.4f \t %7.4f \t %7.4f \t %7.4f \t %7.4f \t %7.4f\n" % (int(data[1]), int(data[2]), float(data[3]), float( data[4]), float(data[5]), float(data[6]), float( data[7]), float(data[8]))) f.write("$end\n") with open(base + ".dfr", "w") as f: for line in fraginfo: f.write(line) f.write("\n$bond\n") for line in bonds: f.write(line) f.write("$end bond\n\n$angle\n") for line in angles: f.write(line) f.write("$end angle\n\n$dihedral\n") for line in dihedrals: f.write(line) if dih4: for line in improper: f.write(line) f.write("$end dihedral\n") if improper: for line in improper: print("\n$improper dihedral\n") f.write(line) f.write("$end improper dihedral\n") if not flexfrag: withoutba = clean_dofs(base + ".dfr") with open(base + ".dfr", 'w') as f: f.write(withoutba) print("The files %s and %s were successfully generated." % (base + ".txt", base + ".dfr"))