def get_formula(self): """ Get the chemical formula Returns: :obj:`EmpiricalFormula`: chemical formula """ if self.structure: el_table = openbabel.OBElementTable() formula = {} mass = 0 for i_atom in range(self.structure.NumAtoms()): atom = self.structure.GetAtom(i_atom + 1) el = el_table.GetSymbol(atom.GetAtomicNum()) if el in formula: formula[el] += 1 else: formula[el] = 1 mass += el_table.GetMass(atom.GetAtomicNum()) formula = EmpiricalFormula(formula) # calc hydrogens because OpenBabel doesn't output this formula['H'] = round((self.structure.GetMolWt() - mass) / el_table.GetMass(1)) return formula else: return None
def save_structfile(poltype,molstruct, structfname): """ Intent: Output the data in the OBMol structure to a file (such as *.xyz) Input: molstruct: OBMol structure structfname: output file name Output: file is output to structfname Referenced By: tor_opt_sp, compute_mm_tor_energy Description: - """ strctext = os.path.splitext(structfname)[1] tmpconv = openbabel.OBConversion() if strctext in '.xyz': tmpfh = open(structfname, "w") maxidx = max(poltype.symmetryclass) iteratom = openbabel.OBMolAtomIter(molstruct) etab = openbabel.OBElementTable() tmpfh.write('%6d %s\n' % (molstruct.NumAtoms(), molstruct.GetTitle())) for ia in iteratom: tmpfh.write( '%6d %2s %13.6f %11.6f %11.6f %5d' % (ia.GetIdx(), etab.GetSymbol(ia.GetAtomicNum()), ia.x(), ia.y(), ia.z(), poltype.prmstartidx + (maxidx - poltype.symmetryclass[ia.GetIdx() - 1]))) iteratomatom = openbabel.OBAtomAtomIter(ia) neighbors = [] for iaa in iteratomatom: neighbors.append(iaa.GetIdx()) neighbors = sorted(neighbors) for iaa in neighbors: tmpfh.write('%5d' % iaa) tmpfh.write('\n') else: inFormat = openbabel.OBConversion.FormatFromExt(structfname) tmpconv.SetOutFormat(inFormat) return tmpconv.WriteFile(molstruct, structfname)
def _parse_atom_site_openbabel(parsed): """Parse _atom_site record to OBMolecule Args: parsed (dict of str): Parsed mmcif file. Returns: [OBMol]: openbabel representation of the protein structure. """ perceived_atom_site = parsed["_atom_site"] atom_site = _trim_models(perceived_atom_site) table = ob.OBElementTable() last_res_id = None last_res_name = None last_chain_id = None chain_num = 0 res = None mol = ob.OBMol() mol.SetChainsPerceived() mol.BeginModify() for i in range(len(atom_site["id"])): current_res_id = _get_res_id(atom_site, i) ins_code = _get_ins_code(atom_site, i) if last_chain_id != atom_site["auth_asym_id"][i]: chain_num += 1 last_chain_id = atom_site["auth_asym_id"][i] if (current_res_id != last_res_id or atom_site["auth_asym_id"][i] != last_chain_id or atom_site["label_comp_id"][i] != last_res_name): last_res_id = current_res_id last_res_name = atom_site["label_comp_id"][i] res = mol.NewResidue() res.SetChainNum(chain_num) res.SetNum(str(last_res_id)) res.SetName(atom_site["label_comp_id"][i]) res.SetInsertionCode(ins_code) _init_openbabel_atom(table, mol, res, atom_site, i) resdat = ob.OBResidueData() resdat.AssignBonds(mol, ob.OBBitVec()) mol.ConnectTheDots() mol.PerceiveBondOrders() if "_struct_conn" in parsed: parse_struct_conn_bonds(mol, parsed) mol.EndModify() return mol
def get_mass(types): m = [] etab = ob.OBElementTable() for value in types: a = etab.GetAtomicNum(value) c = etab.GetMass(a) m.append(c) return m
def get_mol_species(mol): species = [] num_atoms = mol.NumAtoms() element_table = ob.OBElementTable() for i in range(1, num_atoms + 1): a = mol.GetAtom(i) atomic_num = a.GetAtomicNum() symbol = element_table.GetSymbol(atomic_num) species.append(symbol) return species
def read_txt_to_mol(txtfile): # lists to store the charges and LJ parameters q = [] eps = [] sig = [] # convert the .txt to a .xyz to read in pybel mol to perceive all the bonds, angles, dihedrals.. fd, temp_path = tempfile.mkstemp(suffix=".xyz") fxyz = os.fdopen(fd, 'w') # table to convert atomic number to symbols if not ob3: etab = openbabel.OBElementTable() with open(txtfile, 'r') as f: line = f.readline() combrule = line.strip() line = f.readline() if int(line.strip()) != 1: print( "Your .txt should have only one molecule, the one present in the .dfr" ) sys.exit(0) line = f.readline() natoms = int(line.split()[0]) # write .xyz header fxyz.write("%d\nGenerated from %s\n" % (natoms, txtfile)) for i in range(natoms): # write the atomic symbol and coordinates line = f.readline() atnum = int(line.split()[1]) x, y, z = [float(x) for x in line.split()[2:5]] if ob3: fxyz.write("%s\t%f\t%f\t%f\n" % (openbabel.GetSymbol(atnum), x, y, z)) else: fxyz.write("%s\t%f\t%f\t%f\n" % (etab.GetSymbol(atnum), x, y, z)) # store the parameters qv, epsv, sigv = [float(x) for x in line.split()[5:]] q.append(qv) eps.append(epsv) sig.append(sigv) fxyz.close() # read filel to a mol, remove temp and return mol = pybel.readfile("xyz", temp_path).__next__() os.remove(temp_path) return mol, q, eps, sig
def get_mol_info(mol): # table to convert atomic number to symbols etab = openbabel.OBElementTable() q_atoms = [] q_all = [] for atom in mol: q_atoms.append(etab.GetSymbol(atom.atomicnum)) q_all.append(atom.coords) return np.asarray(q_atoms), np.asarray(q_all)
def to_openbabel_Mol(mol, CalcBondmap = False): obmol = ob.OBMol() if not mol.coords == None: for atom, coord in zip(mol.atoms, mol.coords): obatom = ob.OBAtom() obatom.SetAtomicNum(ob.OBElementTable().GetAtomicNum(atom)) coord_vec = ob.vector3(coord[0], coord[1], coord[2]) obatom.SetVector(coord_vec) obmol.InsertAtom(obatom) else: for atom in mol.atoms: obatom = ob.OBAtom() obatom.SetAtomicNum(ob.OBElementTable().GetAtomicNum(atom)) obmol.InsertAtom(obatom) if CalcBondmap == True: obmol.ConnectTheDots() obmol.PerceiveBondOrders() else: for bond in mol.bondmap: obmol.AddBond(bond[0] + 1, bond[1] + 1, bond[2]) return obmol
def CreatePsi4TorESPInputFile(poltype,finalstruct,torxyzfname,optmol,molecprefix,a,b,c,d,torang,phaseangle,makecube=None): inputname= '%s-sp-%d-%d-%d-%d-%03d_psi4.dat' % (molecprefix,a,b,c,d,round((torang+phaseangle)%360)) temp=open(inputname,'w') temp.write('molecule { '+'\n') temp.write('%d %d\n' % (optmol.GetTotalCharge(), 1)) iteratom = openbabel.OBMolAtomIter(optmol) etab = openbabel.OBElementTable() if os.path.isfile(torxyzfname): xyzstr = open(torxyzfname,'r') xyzstrl = xyzstr.readlines() i = 0 for atm in iteratom: i = i + 1 ln = xyzstrl[i] temp.write('%2s %11.6f %11.6f %11.6f\n' % (etab.GetSymbol(atm.GetAtomicNum()), float(ln.split()[2]),float(ln.split()[3]),float(ln.split()[4]))) xyzstr.close() temp.write('}'+'\n') if poltype.torsppcm==True: temp.write('set {'+'\n') temp.write(' basis '+poltype.torspbasisset+'\n') temp.write(' e_convergence 10 '+'\n') temp.write(' d_convergence 10 '+'\n') temp.write(' scf_type pk'+'\n') temp.write(' pcm true'+'\n') temp.write(' pcm_scf_type total '+'\n') temp.write('}'+'\n') temp.write('pcm = {'+'\n') temp.write(' Units = Angstrom'+'\n') temp.write(' Medium {'+'\n') temp.write(' SolverType = IEFPCM'+'\n') temp.write(' Solvent = Water'+'\n') temp.write(' }'+'\n') temp.write(' Cavity {'+'\n') temp.write(' RadiiSet = UFF'+'\n') temp.write(' Type = GePol'+'\n') temp.write(' Scaling = False'+'\n') temp.write(' Area = 0.3'+'\n') temp.write(' Mode = Implicit'+'\n') temp.write(' }'+'\n') temp.write('}'+'\n') temp.write('memory '+poltype.torsmem+'\n') temp.write('set_num_threads(%s)'%(poltype.numproc)+'\n') temp.write('psi4_io.set_default_path("%s")'%(poltype.scratchdir)+'\n') temp.write('set freeze_core True'+'\n') temp.write("E, wfn = energy('%s/%s',return_wfn=True)" % (poltype.torspmethod.lower(),poltype.torspbasisset)+'\n') temp.write('oeprop("WIBERG_LOWDIN_INDICES")'+'\n') temp.write('clean()'+'\n') temp.close() outputname=os.path.splitext(inputname)[0] + '.log' return inputname,outputname
def _add_atom_collection(self, zorder=100, **kwargs): """Draw atoms as colored circles on the axes.""" col = [] colors = [] etab = ob.OBElementTable() for atom in ob.OBMolAtomIter(self.molecule): colors.append(etab.GetRGB(atom.GetAtomicNum())) radius = etab.GetCovalentRad(atom.GetAtomicNum()) circle = Circle(self._2Dcoords(atom), radius) col.append(circle) kw = {'facecolors': colors, 'edgecolors': colors} kwargs.update(kw) self._mol_atoms = PatchCollection(col, zorder=zorder, **kwargs) self.axes.add_collection(self._mol_atoms)
def gen_optcomfile(poltype,comfname,numproc,maxmem,maxdisk,chkname,mol): """ Intent: Create *.com file for qm opt Input: comfname: com file name numproc: number of processors maxmem: max memory size chkname: chk file name mol: OBMol object Output: *opt-*.com is written Referenced By: run_gaussian Description: - """ restraintlist = [] write_com_header(poltype,comfname,chkname,maxdisk,maxmem,numproc) tmpfh = open(comfname, "a") optimizeoptlist = ["maxcycle=%s"%(poltype.optmaxcycle)] if restraintlist: optimizeoptlist.insert(0,poltype.gausoptcoords) optstr=gen_opt_str(poltype,optimizeoptlist) if ('I ' in mol.GetSpacedFormula()): optstring="%s HF/Gen freq Guess=INDO MaxDisk=%s\n" % (optstr,maxdisk) else: if poltype.freq==True: if poltype.optpcm==True: optstring= "%s %s/%s freq Guess=INDO MaxDisk=%s SCRF=(PCM)\n" % (optstr,poltype.optmethod,poltype.optbasisset,maxdisk) else: optstring= "%s %s/%s freq Guess=INDO MaxDisk=%s\n" % (optstr,poltype.optmethod,poltype.optbasisset,maxdisk) else: if poltype.optpcm==True: optstring= "%s %s/%s Guess=INDO MaxDisk=%s SCRF=(PCM)\n" % (optstr,poltype.optmethod,poltype.optbasisset,maxdisk) else: optstring= "%s %s/%s Guess=INDO MaxDisk=%s\n" % (optstr,poltype.optmethod,poltype.optbasisset,maxdisk) tmpfh.write(optstring) commentstr = poltype.molecprefix + " Gaussian SP Calculation on " + gethostname() tmpfh.write('\n%s\n\n' % commentstr) tmpfh.write('%d %d\n' % (mol.GetTotalCharge(), mol.GetTotalSpinMultiplicity())) tmpfh.close() iteratombab = openbabel.OBMolAtomIter(mol) print('atomnumber',mol.NumAtoms()) tmpfh = open(comfname, "a") etab = openbabel.OBElementTable() for atm in iteratombab: tmpfh.write('%2s %11.6f %11.6f %11.6f\n' % (etab.GetSymbol(atm.GetAtomicNum()), atm.x(), atm.y(), atm.z())) tmpfh.write('\n') tmpfh.close()
def get_radius(self, mol): radius = [] ref_radius = CovalentRadius.radius num_atoms = mol.NumAtoms() element_table = ob.OBElementTable() for i in range(1, num_atoms + 1): a = mol.GetAtom(i) atomic_num = a.GetAtomicNum() symbol = element_table.GetSymbol(atomic_num) if symbol in self.metals: scale = self.metal_radius_scale else: scale = self.covalent_radius_scale rad = ref_radius[symbol] * self.angstrom2au * scale radius.append(rad) return radius
def __sortElements(elts): """ Sorts elements in list according to atomic number """ table = openbabel.OBElementTable() res = list() for e in elts: for l in [3, 2, 1]: num = table.GetAtomicNum(e[:l]) if num > 0: res.append("%03d%s" % (num, e)) break res.sort() #print "__sortElements", res del elts[:] for e in res: elts.append(e[3:])
def _add_atom_labels(self, zorder=100, **kwargs): """Draw atom labels on the axes.""" box_props = dict(boxstyle='round', facecolor='white', edgecolor='none') etab = ob.OBElementTable() for atom in ob.OBMolAtomIter(self.molecule): x, y = self._2Dcoords(atom) kw = dict(horizontalalignment="center", verticalalignment="center", bbox=box_props) kwargs.update(kw) label = AtomText(x, y, etab.GetSymbol(atom.GetAtomicNum()), atom.GetIdx(), etab.GetRGB(atom.GetAtomicNum()), zorder=zorder, **kwargs) self._mol_labels.append(label) self.axes.add_artist(label)
def atom_groups(self): group_dict = {} pse = openbabel.OBElementTable() for at in openbabel.OBMolAtomIter(self.obmol): atname = pse.GetSymbol(at.GetAtomicNum()) if atname in group_dict: group_dict[atname].append(at.GetIdx() - 1) else: group_dict[atname] = [at.GetIdx() - 1] groupnames = [] groups = [] for g in group_dict: groupnames.append(g) groups.append(group_dict[g]) return groups, groupnames
def iter_bonds( self ) : """bonds iterator""" if self._verbose : sys.stdout.write( "%s.iter_bonds()\n" % (self.__class__.__name__,) ) et = openbabel.OBElementTable() for i in xrange( self._mol.OBMol.NumBonds() ) : bond = self._mol.OBMol.GetBond( i ) rc = { "id" : (bond.GetIdx() + 1) } rc["type"] = "covalent" if bond.IsAmide() : rc["type"] = "amide" elif bond.IsEster() : rc["type"] = "ester" elif bond.IsCarbonyl() : rc["type"] = "carbonyl" elt1 = et.GetSymbol( bond.GetBeginAtom().GetAtomicNum() ) if str( elt1 ).lower() == "xx" : elt1 = "x" rc["atom1"] = "%s%d" % (elt1,bond.GetBeginAtom().GetIdx()) elt2 = et.GetSymbol( bond.GetEndAtom().GetAtomicNum() ) if str( elt2 ).lower() == "xx" : elt2 = "x" rc["atom2"] = "%s%d" % (elt2,bond.GetEndAtom().GetIdx()) yield rc
def parse_mol_info(fname, fcharges, axis, buffa, buffo, pbcbonds, printdih, ignorebonds, ignoreimproper): iaxis = {"x": 0, "y": 1, "z": 2} if axis in iaxis: repaxis = iaxis[axis] else: print("Error: invalid axis") sys.exit(0) if fcharges: chargesLabel = {} with open(fcharges, "r") as f: for line in f: chargesLabel[line.split()[0]] = float(line.split()[1]) # set openbabel file format base, ext = os.path.splitext(fname) obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats(ext[1:], "xyz") # trick to disable ring perception and make the ReadFile waaaay faster # Source: https://sourceforge.net/p/openbabel/mailman/openbabel-discuss/thread/56e1812d-396a-db7c-096d-d378a077853f%40ipcms.unistra.fr/#msg36225392 obConversion.AddOption("b", openbabel.OBConversion.INOPTIONS) # read molecule to OBMol object mol = openbabel.OBMol() obConversion.ReadFile(mol, fname) mol.ConnectTheDots() # necessary because of the 'b' INOPTION # split the molecules molecules = mol.Separate() # detect the molecules types mTypes = {} mapmTypes = {} atomIdToMol = {} nty = 0 for i, submol in enumerate(molecules, start=1): atomiter = openbabel.OBMolAtomIter(submol) atlist = [] for at in atomiter: atlist.append(at.GetAtomicNum()) atomIdToMol[at.GetId()] = i foundType = None for ty in mTypes: # check if there's already a molecule of this type if atlist == mTypes[ty]: foundType = ty # if not, create a new type if not foundType: nty += 1 foundType = nty mTypes[nty] = atlist mapmTypes[i] = foundType # get atomic labels from pdb idToAtomicLabel = {} if ext[1:] == "pdb": for res in openbabel.OBResidueIter(mol): for atom in openbabel.OBResidueAtomIter(res): if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1): idToAtomicLabel[ atom.GetId()] = res.GetAtomID(atom).strip() + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: idToAtomicLabel[atom.GetId()] = res.GetAtomID(atom).strip() else: if not ob3: etab = openbabel.OBElementTable() for atom in openbabel.OBMolAtomIter(mol): if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1): if ob3: idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol( atom.GetAtomicNum()) + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: idToAtomicLabel[atom.GetId()] = etab.GetSymbol( atom.GetAtomicNum()) + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: if ob3: idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol( atom.GetAtomicNum()) else: idToAtomicLabel[atom.GetId()] = etab.GetSymbol( atom.GetAtomicNum()) # print(idToAtomicLabel) # identify atom types and get masses outMasses = "Masses\n\n" massTypes = {} mapTypes = {} nmassTypes = 0 atomIterator = openbabel.OBMolAtomIter(mol) for atom in atomIterator: i = atom.GetId() if idToAtomicLabel[i] not in massTypes: nmassTypes += 1 mapTypes[nmassTypes] = idToAtomicLabel[i] massTypes[idToAtomicLabel[i]] = nmassTypes outMasses += "\t%d\t%.3f\t# %s\n" % ( nmassTypes, atom.GetAtomicMass(), idToAtomicLabel[i]) # create atoms list outAtoms = "Atoms # full\n\n" xmin = float("inf") xmax = float("-inf") ymin = float("inf") ymax = float("-inf") zmin = float("inf") zmax = float("-inf") natoms = 0 acoords = [] for mnum, imol in enumerate(molecules, start=1): atomIterator = openbabel.OBMolAtomIter(imol) for atom in sorted(atomIterator, key=lambda x: x.GetId()): natoms += 1 i = atom.GetId() apos = (atom.GetX(), atom.GetY(), atom.GetZ()) acoords.append(Atom(atom.GetAtomicNum(), apos)) # look for the maximum and minimum x for the box (improve later with numpy and all coordinates) if apos[0] > xmax: xmax = apos[0] if apos[0] < xmin: xmin = apos[0] if apos[1] > ymax: ymax = apos[1] if apos[1] < ymin: ymin = apos[1] if apos[2] > zmax: zmax = apos[2] if apos[2] < zmin: zmin = apos[2] if fcharges: outAtoms += "\t%d\t%d\t%d\t%.6f\t%.4f\t%.4f\t%.4f\t# %s\n" % ( i + 1, mnum, massTypes[idToAtomicLabel[i]], chargesLabel[idToAtomicLabel[i]], atom.GetX(), atom.GetY(), atom.GetZ(), idToAtomicLabel[i]) else: outAtoms += "\t%d\t%d\t%d\tX.XXXXXX\t%.4f\t%.4f\t%.4f\t# %s\n" % ( i + 1, mnum, massTypes[idToAtomicLabel[i]], atom.GetX(), atom.GetY(), atom.GetZ(), idToAtomicLabel[i]) # define box shape and size try: fromBounds = False rcell = mol.GetData(12) cell = openbabel.toUnitCell(rcell) v1 = [ cell.GetCellVectors()[0].GetX(), cell.GetCellVectors()[0].GetY(), cell.GetCellVectors()[0].GetZ() ] v2 = [ cell.GetCellVectors()[1].GetX(), cell.GetCellVectors()[1].GetY(), cell.GetCellVectors()[1].GetZ() ] v3 = [ cell.GetCellVectors()[2].GetX(), cell.GetCellVectors()[2].GetY(), cell.GetCellVectors()[2].GetZ() ] boxinfo = [v1, v2, v3] orthogonal = True for i, array in enumerate(boxinfo): for j in range(3): if i == j: continue if not math.isclose(0., array[j], abs_tol=1e-6): orthogonal = False except: fromBounds = True v1 = [xmax - xmin, 0., 0.] v2 = [0., ymax - ymin, 0.] v3 = [0., 0., zmax - zmin] orthogonal = True # add buffer if orthogonal: buf = [] boxinfo = [v1, v2, v3] for i, val in enumerate(boxinfo[repaxis]): if i == repaxis: buf.append(val + buffa) else: buf.append(val) boxinfo[repaxis] = buf for i in range(3): if i == repaxis: continue buf = [] for j, val in enumerate(boxinfo[i]): if j == i: buf.append(val + buffo) else: buf.append(val) boxinfo[i] = buf # print(boxinfo) # Duplicate to get the bonds in the PBC. Taken from (method _crd2bond): # https://github.com/tongzhugroup/mddatasetbuilder/blob/66eb0f15e972be0f5534dcda27af253cd8891ff2/mddatasetbuilder/detect.py#L213 if pbcbonds: acoords = Atoms(acoords, cell=boxinfo, pbc=True) repatoms = acoords.repeat( 2 )[natoms:] # repeat the unit cell in each direction (len(repatoms) = 7*natoms) tree = cKDTree(acoords.get_positions()) d = tree.query(repatoms.get_positions(), k=1)[0] nearest = d < 8. ghost_atoms = repatoms[nearest] realnumber = np.where(nearest)[0] % natoms acoords += ghost_atoms write("replicated.xyz", acoords) # write the structure with the replicated atoms # write new mol with new bonds nmol = openbabel.OBMol() nmol.BeginModify() for idx, (num, position) in enumerate( zip(acoords.get_atomic_numbers(), acoords.positions)): a = nmol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) nmol.ConnectTheDots() # nmol.PerceiveBondOrders() # super slow becauses it looks for rings nmol.EndModify() else: acoords = Atoms(acoords, cell=boxinfo, pbc=False) nmol = openbabel.OBMol() nmol.BeginModify() for idx, (num, position) in enumerate( zip(acoords.get_atomic_numbers(), acoords.positions)): a = nmol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) nmol.ConnectTheDots() # nmol.PerceiveBondOrders() # super slow becauses it looks for rings nmol.EndModify() # identify bond types and create bond list outBonds = "Bonds # harmonic\n\n" bondTypes = {} mapbTypes = {} nbondTypes = 0 nbonds = 0 bondsToDelete = [] bondIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: bondIterators.append(openbabel.OBMolBondIter(smol)) else: bondIterators.append(openbabel.OBMolBondIter(nmol)) lastidx = 1 for iterator in bondIterators: for i, bond in enumerate(iterator, lastidx): b1 = bond.GetBeginAtom().GetId() b2 = bond.GetEndAtom().GetId() # check if its a bond of the replica only if (b1 >= natoms) and (b2 >= natoms): bondsToDelete.append(bond) continue # remap to a real atom if needed if b1 >= natoms: b1 = realnumber[b1 - natoms] if b2 >= natoms: b2 = realnumber[b2 - natoms] # identify bond type btype1 = "%s - %s" % (idToAtomicLabel[b1], idToAtomicLabel[b2]) btype2 = "%s - %s" % (idToAtomicLabel[b2], idToAtomicLabel[b1]) if btype1 in bondTypes: bondid = bondTypes[btype1] bstring = btype1 elif btype2 in bondTypes: bondid = bondTypes[btype2] bstring = btype2 else: nbondTypes += 1 mapbTypes[nbondTypes] = btype1 bondid = nbondTypes bondTypes[btype1] = nbondTypes bstring = btype1 nbonds += 1 outBonds += "\t%d\t%d\t%d\t%d\t# %s\n" % (nbonds, bondid, b1 + 1, b2 + 1, bstring) lastidx = i # delete the bonds of atoms from other replicas for bond in bondsToDelete: nmol.DeleteBond(bond) # identify angle types and create angle list angleTypes = {} mapaTypes = {} nangleTypes = 0 nangles = 0 angleIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.FindAngles() angleIterators.append(openbabel.OBMolAngleIter(smol)) prevnumatoms = sepmols[0].NumAtoms() else: nmol.FindAngles() angleIterators.append(openbabel.OBMolAngleIter(nmol)) outAngles = "Angles # harmonic\n\n" lastidx = 1 for j, iterator in enumerate(angleIterators, 1): for i, angle in enumerate(iterator, lastidx): if ignorebonds: a1 = angle[1] + prevnumatoms a2 = angle[0] + prevnumatoms a3 = angle[2] + prevnumatoms else: a1 = angle[1] a2 = angle[0] a3 = angle[2] # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] atype1 = "%s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3]) atype2 = "%s - %s - %s" % ( idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if atype1 in angleTypes: angleid = angleTypes[atype1] astring = atype1 elif atype2 in angleTypes: angleid = angleTypes[atype2] astring = atype2 else: nangleTypes += 1 mapaTypes[nangleTypes] = atype1 angleid = nangleTypes angleTypes[atype1] = nangleTypes astring = atype1 nangles += 1 outAngles += "\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( nangles, angleid, a1 + 1, a2 + 1, a3 + 1, astring) lastidx = i if ignorebonds: prevnumatoms += sepmols[j].NumAtoms() # identify dihedral types and create dihedral list if printdih: dihedralTypes = {} mapdTypes = {} ndihedralTypes = 0 ndihedrals = 0 dihedralIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.FindTorsions() dihedralIterators.append(openbabel.OBMolTorsionIter(smol)) else: nmol.FindTorsions() dihedralIterators.append(openbabel.OBMolTorsionIter(nmol)) outDihedrals = "Dihedrals # charmmfsw\n\n" lastidx = 1 for iterator in dihedralIterators: for i, dihedral in enumerate(iterator, lastidx): a1 = dihedral[0] a2 = dihedral[1] a3 = dihedral[2] a4 = dihedral[3] # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] if a4 >= natoms: a4 = realnumber[a4 - natoms] dtype1 = "%s - %s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3], idToAtomicLabel[a4]) dtype2 = "%s - %s - %s - %s" % ( idToAtomicLabel[a4], idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if dtype1 in dihedralTypes: dihedralid = dihedralTypes[dtype1] dstring = dtype1 elif dtype2 in dihedralTypes: dihedralid = dihedralTypes[dtype2] dstring = dtype2 else: ndihedralTypes += 1 mapdTypes[ndihedralTypes] = dtype1 dihedralid = ndihedralTypes dihedralTypes[dtype1] = ndihedralTypes dstring = dtype1 ndihedrals += 1 outDihedrals += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( ndihedrals, dihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1, dstring) lastidx = i if not ignoreimproper: # look for the improper dihedrals improperDihedralTypes = {} mapiDTypes = {} niDihedralTypes = 0 niDihedrals = 0 mollist = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.PerceiveBondOrders() mollist.append(smol) else: nmol.PerceiveBondOrders() mollist.append(nmol) outImpropers = "Impropers # harmonic\n\n" for imol in mollist: atomIterator = openbabel.OBMolAtomIter(imol) for atom in atomIterator: try: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetValence()) expDegree = atom.GetValence() except: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetExplicitDegree()) expDegree = atom.GetExplicitDegree() # returns impropers for atoms with connected to other 3 atoms and SP2 hybridization if atom.GetHyb() == 2 and expDegree == 3: connectedAtoms = [] for atom2, depth in openbabel.OBMolAtomBFSIter( imol, atom.GetId() + 1): if depth == 2: connectedAtoms.append(atom2) torsional = [ atom.GetId() + 1, connectedAtoms[0].GetId() + 1, connectedAtoms[1].GetId() + 1, connectedAtoms[2].GetId() + 1 ] a1 = torsional[0] - 1 a2 = torsional[1] - 1 a3 = torsional[2] - 1 a4 = torsional[3] - 1 # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] if a4 >= natoms: a4 = realnumber[a4 - natoms] dtype1 = "%s - %s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3], idToAtomicLabel[a4]) dtype2 = "%s - %s - %s - %s" % ( idToAtomicLabel[a4], idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if dtype1 in improperDihedralTypes: idihedralid = improperDihedralTypes[dtype1] dstring = dtype1 elif dtype2 in improperDihedralTypes: idihedralid = improperDihedralTypes[dtype2] dstring = dtype2 else: niDihedralTypes += 1 mapiDTypes[niDihedralTypes] = dtype1 idihedralid = niDihedralTypes improperDihedralTypes[dtype1] = niDihedralTypes dstring = dtype1 niDihedrals += 1 outImpropers += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( niDihedrals, idihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1, dstring) # print header if printdih and (ndihedrals > 0): if ignoreimproper or (niDihedrals == 0): header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\n" % ( fname, natoms, nbonds, nangles, ndihedrals, nmassTypes, nbondTypes, nangleTypes, ndihedralTypes) else: header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\t%d impropers\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\t%d improper types\n\n" % ( fname, natoms, nbonds, nangles, ndihedrals, niDihedrals, nmassTypes, nbondTypes, nangleTypes, ndihedralTypes, niDihedralTypes) else: header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\n" % ( fname, natoms, nbonds, nangles, nmassTypes, nbondTypes, nangleTypes) # add box info if fromBounds: boxsize = [(xmin, xmax), (ymin, ymax), (zmin, zmax)] boxsize[repaxis] = (boxsize[repaxis][0] - buffa / 2., boxsize[repaxis][1] + buffa / 2.) for i in range(3): if i == repaxis: continue boxsize[i] = (boxsize[i][0] - buffo / 2., boxsize[i][1] + buffo / 2.) header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % ( boxsize[0][0], boxsize[0][1], boxsize[1][0], boxsize[1][1], boxsize[2][0], boxsize[2][1]) else: if orthogonal: header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % ( 0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2]) else: header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n\t%.8f\t%.8f\t%.8f\t xy xz yz\n" % ( 0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2], boxinfo[1][0], boxinfo[2][0], boxinfo[2][1]) # print Coeffs outCoeffs = "Pair Coeffs\n\n" for i in range(1, nmassTypes + 1): outCoeffs += "\t%d\teps\tsig\t# %s\n" % (i, mapTypes[i]) outCoeffs += "\nBond Coeffs\n\n" for i in range(1, nbondTypes + 1): outCoeffs += "\t%d\tK\tr_0\t# %s\n" % (i, mapbTypes[i]) outCoeffs += "\nAngle Coeffs\n\n" for i in range(1, nangleTypes + 1): outCoeffs += "\t%d\tK\ttetha_0 (deg)\t# %s\n" % (i, mapaTypes[i]) if printdih and (ndihedrals > 0): outCoeffs += "\nDihedral Coeffs\n\n" for i in range(1, ndihedralTypes + 1): outCoeffs += "\t%d\tK\tn\tphi_0 (deg)\tw\t# %s\n" % (i, mapdTypes[i]) if not ignoreimproper and (niDihedralTypes > 0): outCoeffs += "\nImproper Coeffs\n\n" for i in range(1, niDihedralTypes + 1): outCoeffs += "\t%d\tK\txi_0 (deg)\t# %s\n" % (i, mapiDTypes[i]) if printdih and (ndihedrals > 0): if ignoreimproper or (niDihedralTypes == 0): return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals else: return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals + "\n" + outImpropers else: return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles
import openbabel import sys import pickle #import plpy def notice(msg): print msg plpy.notice(msg) # functions used in atoms, bonds and fragments functions tbl = openbabel.OBElementTable() def nbr_smarts(a1, a2=None): # construct smarts of each neighbor, possible excluding a2 smarts = [] # each neighbor as array element for later sort for nbr in openbabel.OBAtomAtomIter(a1): if nbr.GetAtomicNum() == 1: continue #if a2 and nbr.GetIdx() == a2.GetIdx(): # continue # include bond order if necessary (not single or aromatic) b = a1.GetBond(nbr) if b.IsSingle() and a1.IsAromatic(): bnd = "-" elif b.IsDouble(): bnd = "=" elif b.IsTriple(): bnd = "#" else:
class Molecule(object): # for more rendering options visit: # http://www.ggasoftware.com/opensource/indigo/api/options#rendering _indigo = indigo.Indigo() _renderer = indigo_renderer.IndigoRenderer(_indigo) _indigo.setOption('render-output-format', 'svg') _indigo.setOption('render-margins', 10, 10) _indigo.setOption('render-stereo-style', 'none') _indigo.setOption('render-implicit-hydrogens-visible', False) _indigo.setOption('render-coloring', True) _indigo.setOption('render-bond-length', 20.0) _indigo.setOption('render-label-mode', 'hetero') _obElements = openbabel.OBElementTable() @staticmethod def GetNumberOfElements(): return Molecule._obElements.GetNumberOfElements() @staticmethod def GetAllElements(): return [ Molecule._obElements.GetSymbol(i) for i in xrange(Molecule.GetNumberOfElements()) ] @staticmethod def GetSymbol(atomic_num): return Molecule._obElements.GetSymbol(atomic_num) @staticmethod def GetAtomicNum(elem): if type(elem) == types.UnicodeType: elem = str(elem) return Molecule._obElements.GetAtomicNum(elem) @staticmethod def SetBondLength(l): Molecule._indigo.setOption('render-bond-length', l) @staticmethod def VerifySmarts(smarts): try: pybel.Smarts(smarts) return True except IOError: return False def __init__(self): self.title = None self.obmol = openbabel.OBMol() self.pybel_mol = None self.smiles = None self.inchi = None def __str__(self): return self.title or self.smiles or self.inchi or "" def __len__(self): return self.GetNumAtoms() def Clone(self): tmp = Molecule() tmp.title = self.title tmp.obmol = openbabel.OBMol(self.obmol) tmp.pybel_mol = pybel.Molecule(tmp.obmol) tmp.smiles = self.smiles tmp.inchi = self.inchi return tmp def SetTitle(self, title): self.title = title @staticmethod def FromSmiles(smiles): m = Molecule() m.smiles = smiles obConversion = openbabel.OBConversion() obConversion.SetInFormat("smiles") if not obConversion.ReadString(m.obmol, m.smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) try: m.UpdateInChI() m.UpdatePybelMol() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from SMILES: " + smiles) m.SetTitle(smiles) return m @staticmethod def FromInChI(inchi): m = Molecule() m.inchi = inchi obConversion = openbabel.OBConversion() obConversion.SetInFormat("inchi") obConversion.ReadString(m.obmol, m.inchi) try: m.UpdateSmiles() m.UpdatePybelMol() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from InChI: " + inchi) m.SetTitle(inchi) return m @staticmethod def FromMol(mol): m = Molecule() obConversion = openbabel.OBConversion() obConversion.SetInFormat("mol") obConversion.ReadString(m.obmol, mol) try: m.UpdateInChI() m.UpdateSmiles() m.UpdatePybelMol() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from MOL file:\n" + mol) m.SetTitle("") return m @staticmethod def FromOBMol(obmol): m = Molecule() m.obmol = obmol try: m.UpdateInChI() m.UpdateSmiles() m.UpdatePybelMol() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from OBMol") m.SetTitle("") return m @staticmethod def _FromFormat(s, fmt='inchi'): if fmt == 'smiles' or fmt == 'smi': return Molecule.FromSmiles(s) if fmt == 'inchi': return Molecule.FromInChI(s) if fmt == 'mol': return Molecule.FromMol(s) if fmt == 'obmol': return Molecule.FromOBMol(s) @staticmethod def _ToFormat(obmol, fmt='inchi'): obConversion = openbabel.OBConversion() obConversion.SetOutFormat(fmt) res = obConversion.WriteString(obmol) if not res: raise OpenBabelError("Cannot convert OBMol to %s" % fmt) if fmt == 'smiles' or fmt == 'smi': res = res.split() if res == []: raise OpenBabelError("Cannot convert OBMol to %s" % fmt) else: return res[0] elif fmt == 'inchi': return res.strip() else: return res @staticmethod def Smiles2InChI(smiles): obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smiles", "inchi") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) return obConversion.WriteString(obmol).strip() @staticmethod def InChI2Smiles(inchi): obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("inchi", "smiles") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, inchi): raise OpenBabelError("Cannot read the InChI string: " + inchi) return obConversion.WriteString(obmol).split()[0] def RemoveHydrogens(self): self.pybel_mol.removeh() def RemoveAtoms(self, indices): self.obmol.BeginModify() for i in sorted(indices, reverse=True): self.obmol.DeleteAtom(self.obmol.GetAtom(i + 1)) self.obmol.EndModify() self.smiles = None self.inchi = None def SetAtomicNum(self, index, new_atomic_num): self.obmol.GetAtom(index + 1).SetAtomicNum(new_atomic_num) self.smiles = None self.inchi = None def ToOBMol(self): return self.obmol def ToPybelMol(self): return self.pybel_mol def ToFormat(self, fmt='inchi'): return Molecule._ToFormat(self.obmol, fmt=fmt) def ToMolfile(self): return self.ToFormat('mol') def UpdateInChI(self): self.inchi = Molecule._ToFormat(self.obmol, 'inchi') def ToInChI(self): """ Lazy storage of the InChI identifier (calculate once only when asked for and store for later use). """ if not self.inchi: self.UpdateInChI() return self.inchi def UpdateSmiles(self): self.smiles = Molecule._ToFormat(self.obmol, 'smiles') def ToSmiles(self): """ Lazy storage of the SMILES identifier (calculate once only when asked for and store for later use). """ if not self.smiles: self.UpdateSmiles() return self.smiles def UpdatePybelMol(self): self.pybel_mol = pybel.Molecule(self.obmol) @staticmethod def _GetFormulaFromInChI(inchi): tokens = re.findall('/f([0-9A-Za-z\.]+/)', inchi) if len(tokens) == 0: tokens = re.findall('InChI=1S?/([0-9A-Za-z\.]+)', inchi) if len(tokens) == 1: return tokens[0] elif len(tokens) > 1: raise ValueError('Bad InChI: ' + inchi) else: return '' @staticmethod def _GetAtomBagAndChargeFromInChI(inchi): fixed_charge = 0 for q in re.findall('/q([0-9\+\-\;]+)', inchi): for s in q.split(';'): if s: fixed_charge += int(s) fixed_protons = 0 for p in re.findall('/p([0-9\+\-\;]+)', inchi): for s in p.split(';'): if s: fixed_protons += int(s) formula = Molecule._GetFormulaFromInChI(inchi) atom_bag = {} for mol_formula_times in formula.split('.'): for times, mol_formula in re.findall('^(\d+)?(\w+)', mol_formula_times): if not times: times = 1 else: times = int(times) for atom, count in re.findall("([A-Z][a-z]*)([0-9]*)", mol_formula): if count == '': count = 1 else: count = int(count) atom_bag[atom] = atom_bag.get(atom, 0) + count * times if fixed_protons: atom_bag['H'] = atom_bag.get('H', 0) + fixed_protons fixed_charge += fixed_protons return atom_bag, fixed_charge @staticmethod def _GetNumElectronsFromInChI(inchi): """Calculates the number of electrons in a given molecule.""" atom_bag, fixed_charge = Molecule._GetAtomBagAndChargeFromInChI(inchi) n_protons = 0 for elem, count in atom_bag.iteritems(): n_protons += count * Molecule._obElements.GetAtomicNum(elem) return n_protons - fixed_charge def GetFormula(self): return Molecule._GetFormulaFromInChI(self.ToInChI()) def GetExactMass(self): return self.obmol.GetExactMass() def GetAtomBagAndCharge(self): return Molecule._GetAtomBagAndChargeFromInChI(self.ToInChI()) def GetHydrogensAndCharge(self): atom_bag, charge = self.GetAtomBagAndCharge() return atom_bag.get('H', 0), charge def GetNumElectrons(self): return Molecule._GetNumElectronsFromInChI(self.ToInChI()) def GetNumAtoms(self): return self.obmol.NumAtoms() def GetAtoms(self): return self.pybel_mol.atoms def FindSmarts(self, smarts): """ Corrects the pyBel version of Smarts.findall() which returns results as tuples, with 1-based indices even though Molecule.atoms is 0-based. Args: mol: the molecule to search in. smarts_str: the SMARTS query to search for. Returns: The re-mapped list of SMARTS matches. """ if type(smarts) == types.StringType: smarts = pybel.Smarts(smarts) shift_left = lambda m: [(n - 1) for n in m] return map(shift_left, smarts.findall(self.pybel_mol)) def ToSVG(self, comment=None): if comment: Molecule._indigo.setOption('render-comment', comment) else: Molecule._indigo.setOption('render-comment', '') try: indigo_mol = Molecule._indigo.loadMolecule(self.ToSmiles()) indigo_mol.aromatize() indigo_mol.layout() svg_str = Molecule._renderer.renderToBuffer(indigo_mol).tostring() id = str(uuid.uuid4()) i = 0 while True: symbol = 'glyph0-%d' % i if svg_str.find('id="' + symbol + '"') != -1: svg_str = svg_str.replace('id="' + symbol + '"', 'id="' + id + "_" + symbol + '"') svg_str = svg_str.replace( 'href="#' + symbol + '"', 'href="#' + id + "_" + symbol + '"') else: break i += 1 return svg_str except indigo.IndigoException as e: return "<b>Indigo error</b>: %s</br>\n" % str(e) def Draw(self, show_title=False): def expose_cairo(win, event, svg): cr = win.window.cairo_create() svg.render_cairo(cr) return True try: if show_title: svg = rsvg.Handle(data=self.ToSVG(self.title)) else: svg = rsvg.Handle(data=self.ToSVG()) except glib.GError: #@UndefinedVariable return _x, _y, w, h = svg.get_dimension_data() win = gtk.Window() win.resize(int(w), int(h)) win.connect("delete-event", lambda w, e: gtk.main_quit()) win.connect("expose-event", expose_cairo, svg) win.show_all() win.connect("destroy", lambda w: gtk.main_quit()) gtk.main() def GetAtomCharges(self): """ Returns: A list of charges, according to the number of atoms in the molecule """ return [atom.formalcharge for atom in self.pybel_mol.atoms] @staticmethod def _GetDissociationTable(molstring, fmt='inchi', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T, transform_multiples=False): """ Returns the relative potentials of pseudoisomers, relative to the most abundant one at pH 7. """ from pygibbs.dissociation_constants import DissociationTable from toolbox import chemaxon diss_table = DissociationTable() try: pKa_table, major_ms = chemaxon.GetDissociationConstants( molstring, mid_pH=mid_pH, transform_multiples=transform_multiples) mol = Molecule.FromSmiles(major_ms) nH, z = mol.GetHydrogensAndCharge() diss_table.SetMolString(nH, nMg=0, s=major_ms) diss_table.SetCharge(nH, z, nMg=0) pKa_higher = [x for x in pKa_table if mid_pH < x[0] < max_pKa] pKa_lower = [x for x in pKa_table if mid_pH > x[0] > min_pKa] for i, (pKa, _, smiles_above) in enumerate(sorted(pKa_higher)): diss_table.AddpKa(pKa, nH_below=(nH - i), nH_above=(nH - i - 1), nMg=0, ref='ChemAxon', T=T) diss_table.SetMolString((nH - i - 1), nMg=0, s=smiles_above) for i, (pKa, smiles_below, _) in enumerate(sorted(pKa_lower, reverse=True)): diss_table.AddpKa(pKa, nH_below=(nH + i + 1), nH_above=(nH + i), nMg=0, ref='ChemAxon', T=T) diss_table.SetMolString((nH + i + 1), nMg=0, s=smiles_below) except chemaxon.ChemAxonError: mol = Molecule._FromFormat(molstring, fmt) diss_table.SetOnlyPseudoisomerMolecule(mol) return diss_table def GetDissociationTable(self, fmt='inchi', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T): """ Returns the relative potentials of pseudoisomers, relative to the most abundant one at pH 7. """ return Molecule._GetDissociationTable(self.ToInChI(), 'inchi', mid_pH, min_pKa, max_pKa, T)
def __init__(self): self.etab = openbabel.OBElementTable() self.non_ad_metal_names = [ "Cu", "Fe", "Na", "K", "Hg", "Co", "U", "Cd", "Ni", "Si", ] self.atom_equivalence_data = [("Se", "S")] self.atom_type_data = [ Info( "Hydrogen", "Hydrogen", "H", 1, 1.000000, 0.020000, 0.000510, 0.000000, 0.370000, 0.000000, False, False, False, False, ), Info( "PolarHydrogen", "PolarHydrogen", "HD", 1, 1.000000, 0.020000, 0.000510, 0.000000, 0.370000, 0.000000, False, False, False, False, ), Info( "AliphaticCarbonXSHydrophobe", "AliphaticCarbonXSHydrophobe", "C", 6, 2.000000, 0.150000, -0.001430, 33.510300, 0.770000, 1.900000, True, False, False, False, ), Info( "AliphaticCarbonXSNonHydrophobe", "AliphaticCarbonXSNonHydrophobe", "C", 6, 2.000000, 0.150000, -0.001430, 33.510300, 0.770000, 1.900000, False, False, False, False, ), Info( "AromaticCarbonXSHydrophobe", "AromaticCarbonXSHydrophobe", "A", 6, 2.000000, 0.150000, -0.000520, 33.510300, 0.770000, 1.900000, True, False, False, False, ), Info( "AromaticCarbonXSNonHydrophobe", "AromaticCarbonXSNonHydrophobe", "A", 6, 2.000000, 0.150000, -0.000520, 33.510300, 0.770000, 1.900000, False, False, False, False, ), Info( "Nitrogen", "Nitrogen", "N", 7, 1.750000, 0.160000, -0.001620, 22.449300, 0.750000, 1.800000, False, False, False, True, ), Info( "NitrogenXSDonor", "NitrogenXSDonor", "N", 7, 1.750000, 0.160000, -0.001620, 22.449300, 0.750000, 1.800000, False, True, False, True, ), Info( "NitrogenXSDonorAcceptor", "NitrogenXSDonorAcceptor", "NA", 7, 1.750000, 0.160000, -0.001620, 22.449300, 0.750000, 1.800000, False, True, True, True, ), Info( "NitrogenXSAcceptor", "NitrogenXSAcceptor", "NA", 7, 1.750000, 0.160000, -0.001620, 22.449300, 0.750000, 1.800000, False, False, True, True, ), Info( "Oxygen", "Oxygen", "O", 8, 1.600000, 0.200000, -0.002510, 17.157300, 0.730000, 1.700000, False, False, False, True, ), Info( "OxygenXSDonor", "OxygenXSDonor", "O", 8, 1.600000, 0.200000, -0.002510, 17.157300, 0.730000, 1.700000, False, True, False, True, ), Info( "OxygenXSDonorAcceptor", "OxygenXSDonorAcceptor", "OA", 8, 1.600000, 0.200000, -0.002510, 17.157300, 0.730000, 1.700000, False, True, True, True, ), Info( "OxygenXSAcceptor", "OxygenXSAcceptor", "OA", 8, 1.600000, 0.200000, -0.002510, 17.157300, 0.730000, 1.700000, False, False, True, True, ), Info( "Sulfur", "Sulfur", "S", 16, 2.000000, 0.200000, -0.002140, 33.510300, 1.020000, 2.000000, False, False, False, True, ), Info( "SulfurAcceptor", "SulfurAcceptor", "SA", 16, 2.000000, 0.200000, -0.002140, 33.510300, 1.020000, 2.000000, False, False, False, True, ), Info( "Phosphorus", "Phosphorus", "P", 15, 2.100000, 0.200000, -0.001100, 38.792400, 1.060000, 2.100000, False, False, False, True, ), Info( "Fluorine", "Fluorine", "F", 9, 1.545000, 0.080000, -0.001100, 15.448000, 0.710000, 1.500000, True, False, False, True, ), Info( "Chlorine", "Chlorine", "Cl", 17, 2.045000, 0.276000, -0.001100, 35.823500, 0.990000, 1.800000, True, False, False, True, ), Info( "Bromine", "Bromine", "Br", 35, 2.165000, 0.389000, -0.001100, 42.566100, 1.140000, 2.000000, True, False, False, True, ), Info( "Iodine", "Iodine", "I", 53, 2.360000, 0.550000, -0.001100, 55.058500, 1.330000, 2.200000, True, False, False, True, ), Info( "Magnesium", "Magnesium", "Mg", 12, 0.650000, 0.875000, -0.001100, 1.560000, 1.300000, 1.200000, False, True, False, True, ), Info( "Manganese", "Manganese", "Mn", 25, 0.650000, 0.875000, -0.001100, 2.140000, 1.390000, 1.200000, False, True, False, True, ), Info( "Zinc", "Zinc", "Zn", 30, 0.740000, 0.550000, -0.001100, 1.700000, 1.310000, 1.200000, False, True, False, True, ), Info( "Calcium", "Calcium", "Ca", 20, 0.990000, 0.550000, -0.001100, 2.770000, 1.740000, 1.200000, False, True, False, True, ), Info( "Iron", "Iron", "Fe", 26, 0.650000, 0.010000, -0.001100, 1.840000, 1.250000, 1.200000, False, True, False, True, ), Info( "GenericMetal", "GenericMetal", "M", 0, 1.200000, 0.000000, -0.001100, 22.449300, 1.750000, 1.200000, False, True, False, True, ), # note AD4 doesn't have boron, so copying from carbon Info( "Boron", "Boron", "B", 5, 2.04, 0.180000, -0.0011, 12.052, 0.90, 1.920000, True, False, False, False, ), ] self.atom_types = [info.sm for info in self.atom_type_data]
def validate_bpform_bonds(form_type): """ Validate bonds in alphabet Args: form_type (:obj:`type`): type of BpForm Raises: :obj:`ValueError`: if any of the bonds are invalid """ form = form_type() element_table = openbabel.OBElementTable() errors = [] # validate bonds to backbone atom_types = [ ['backbone', 'monomer_bond_atoms'], ['backbone', 'monomer_displaced_atoms'], ['bond', 'l_bond_atoms'], ['bond', 'r_bond_atoms'], ['bond', 'l_displaced_atoms'], ['bond', 'r_displaced_atoms'], ] for molecule_md, atom_type in atom_types: molecule = getattr(form, molecule_md) selected_hydrogens = [] for atom_md in getattr(molecule, atom_type): if atom_md.molecule == core.Backbone: if form.backbone.structure: n_backbone_atoms = form.backbone.structure.NumAtoms() else: n_backbone_atoms = 0 if atom_md.position < 1 or atom_md.position > n_backbone_atoms: errors.append('Invalid position {} for {}.{}'.format( atom_md.position, molecule_md, atom_type)) continue atom = form.backbone.structure.GetAtom(atom_md.position) if atom_md.element == 'H' and atom.GetAtomicNum() != 1: atom = core.get_hydrogen_atom(atom, selected_hydrogens, None) if atom is None: continue if element_table.GetSymbol( atom.GetAtomicNum()) != atom_md.element: errors.append( 'Invalid element {} != {} at position {} for {}.{}'. format(element_table.GetSymbol(atom.GetAtomicNum()), atom_md.element, atom_md.position, molecule_md, atom_type)) # validate bonds to monomer atom_types = [ 'backbone_bond_atoms', 'backbone_displaced_atoms', 'r_bond_atoms', 'l_bond_atoms', 'r_displaced_atoms', 'l_displaced_atoms', ] for i_monomer, monomer in enumerate(form.alphabet.monomers.values()): for atom_type in atom_types: selected_hydrogens = [] for atom_md in getattr(monomer, atom_type): if atom_md.molecule == core.Monomer: if atom_md.position < 1 or atom_md.position > monomer.structure.NumAtoms( ): errors.append( 'Invalid position {} for monomeric form:{} {}'. format(atom_md.position, monomer.id, atom_type)) continue atom = monomer.structure.GetAtom(atom_md.position) if atom_md.element == 'H' and atom.GetAtomicNum() != 1: atom = core.get_hydrogen_atom(atom, selected_hydrogens, i_monomer) if atom is None: continue if element_table.GetSymbol( atom.GetAtomicNum()) != atom_md.element: errors.append( 'Invalid element {} != {} at position {} for monomeric form:{} {}' .format( element_table.GetSymbol(atom.GetAtomicNum()), atom_md.element, atom_md.position, monomer.id, atom_type)) # validate monomeric forms and dimers for monomer in form.alphabet.monomers.values(): monomer_form = form_type(seq=[monomer]) try: monomer_structure = monomer_form.get_structure()[0] if monomer_form.get_formula() != OpenBabelUtils.get_formula( monomer_structure): errors.append( 'Monomeric form of {} has incorrect formula: {} != {}'. format(monomer.id, str(monomer_form.get_formula()), str(OpenBabelUtils.get_formula(monomer_structure)))) continue if monomer_form.get_charge() != monomer_structure.GetTotalCharge(): errors.append( 'Monomeric form of {} has incorrect charge: {} != {}'. format(monomer.id, monomer_form.get_charge(), monomer_structure.GetTotalCharge())) continue OpenBabelUtils.export(monomer_structure, 'smiles') OpenBabelUtils.export(monomer_structure, 'inchi') except Exception as error: errors.append( 'Unable to create monomeric form of {}:\n {}'.format( monomer.id, str(error))) if form.can_monomer_bond_left(monomer) and form.can_monomer_bond_right( monomer): dimer_form = form_type(seq=[monomer, monomer]) try: dimer_structure = dimer_form.get_structure()[0] if dimer_form.get_formula() != OpenBabelUtils.get_formula( dimer_structure): errors.append( 'Dimer of {} has incorrect formula: {} != {}'.format( monomer.id, str(dimer_form.get_formula()), str(OpenBabelUtils.get_formula(dimer_structure)))) continue if dimer_form.get_charge() != dimer_structure.GetTotalCharge(): errors.append( 'Dimer of {} has incorrect charge: {} != {}'.format( monomer.id, dimer_form.get_charge(), dimer_structure.GetTotalCharge())) continue OpenBabelUtils.export(dimer_structure, 'smiles') OpenBabelUtils.export(dimer_structure, 'inchi') except Exception as error: errors.append('Unable to form dimer of {}:\n {}'.format( monomer.id, str(error))) # report errors if errors: raise ValueError('BpForm {} is invalid:\n {}'.format( form_type.__name__, '\n '.join(errors)))
def itp_from_params(mol, q, eps, sig, dfrBonds, dfrAngles, dfrDihedrals, dfrImpDih): # table to convert atomic number to symbols if not ob3: etab = openbabel.OBElementTable() # !!! units are converted as the reverse of: http://chembytes.wikidot.com/oplsaagro2tnk and based on GROMACS manual # write header fcontent = """ ; ; Generated by dice2gromacs ; https://github.com/hmcezar/dicetools ; [ atomtypes ] ;name bond_type mass charge ptype sigma epsilon """ # write the atomtypes for i, atom in enumerate(mol.atoms): if ob3: fcontent += "att_%03d %s%03d %7.4f 0.000 A %.5e %.5e\n" % ( i + 1, openbabel.GetSymbol(atom.atomicnum), i + 1, atom.atomicmass, a2nm(sig[i]), cal2j(eps[i])) else: fcontent += "att_%03d %s%03d %7.4f 0.000 A %.5e %.5e\n" % ( i + 1, etab.GetSymbol(atom.atomicnum), i + 1, atom.atomicmass, a2nm(sig[i]), cal2j(eps[i])) fcontent += """ [ moleculetype ] ;name nrexcl UNL 3 [ atoms ] ; nr type resi res atom cgnr charge mass """ # write the atoms for i, atom in enumerate(mol.atoms): if ob3: fcontent += "%6d att_%03d 1 UNL %s%03d 1 %.4f %7.4f\n" % ( i + 1, i + 1, openbabel.GetSymbol( atom.atomicnum), i + 1, q[i], atom.atomicmass) else: fcontent += "%6d att_%03d 1 UNL %s%03d 1 %.4f %7.4f\n" % ( i + 1, i + 1, etab.GetSymbol( atom.atomicnum), i + 1, q[i], atom.atomicmass) # write the bonds fcontent += """ [ bonds ] ; ai aj funct r k """ for bnd in dfrBonds: ai, aj = [int(x) for x in bnd.split()] fcontent += "%6d %6d 1 %.4f %.4f\n" % ( ai, aj, a2nm(dfrBonds[bnd][1]), cal2j(dfrBonds[bnd][0]) * 200.0) # write the angles fcontent += """ [ angles ] ; ai aj ak funct theta cth """ for ang in dfrAngles: ai, aj, ak = [int(x) for x in ang.split()] fcontent += "%6d %6d %6d 1 %.4f %.4f\n" % ( ai, aj, ak, dfrAngles[ang][1], cal2j(dfrAngles[ang][0]) * 2.0) # write the proper dihedrals fcontent += """ [ dihedrals ] ; proper dihedrals - converted to the RB form from Fourier type if OPLS ; ai aj ak al func params """ fimp = "" for dih in dfrDihedrals: ai, aj, ak, al = [int(x) for x in dih.split()] if dfrDihedrals[dih][0].lower() == "amber": # check if it's a proper or improper dihedral bondIterator = openbabel.OBMolBondIter(mol.OBMol) cnt = 0 for bond in bondIterator: if ((ai == bond.GetBeginAtom().GetId() + 1) and (aj == bond.GetEndAtom().GetId() + 1)) or ( (aj == bond.GetBeginAtom().GetId() + 1) and (ai == bond.GetEndAtom().GetId() + 1)): cnt += 1 elif ((aj == bond.GetBeginAtom().GetId() + 1) and (ak == bond.GetEndAtom().GetId() + 1)) or ( (ak == bond.GetBeginAtom().GetId() + 1) and (aj == bond.GetEndAtom().GetId() + 1)): cnt += 1 elif ((ak == bond.GetBeginAtom().GetId() + 1) and (al == bond.GetEndAtom().GetId() + 1)) or ( (al == bond.GetBeginAtom().GetId() + 1) and (ak == bond.GetEndAtom().GetId() + 1)): cnt += 1 if cnt == 3: func = 9 fparam = [ cal2j(float(x)) / 2.0 for x in dfrDihedrals[dih][1:4] ] for i, term in enumerate(fparam, 1): if term != 0.0: fcontent += "%6d %6d %6d %6d %1d %6.2f %9.5f %d\n" % ( ai, aj, ak, al, func, float(dfrDihedrals[dih][i + 3]), term, i) else: func = 4 fparam = [ cal2j(float(x)) / 2.0 for x in dfrDihedrals[dih][1:4] ] for i, term in enumerate(fparam, 1): if term != 0.0: fimp += "%6d %6d %6d %6d %1d %6.2f %9.5f %d\n" % ( ai, aj, ak, al, func, float(dfrDihedrals[dih][i + 3]), term, i) elif dfrDihedrals[dih][0].lower() == "opls": fparam = [cal2j(float(x)) for x in dfrDihedrals[dih][1:4]] c0 = fparam[1] + 0.5 * (fparam[0] + fparam[2]) c1 = 0.5 * (-fparam[0] + 3.0 * fparam[2]) c2 = -fparam[1] c3 = -2.0 * fparam[2] c4 = 0.0 c5 = 0.0 fcontent += "%6d %6d %6d %6d 3 %9.5f %9.5f %9.5f %9.5f %9.5f %9.5f\n" % ( ai, aj, ak, al, c0, c1, c2, c3, c4, c5) else: print( "Error: Dihedral type (%s) found for dihedral %s in .dfr is not valid." % (dfrDihedrals[dih][0], dih)) sys.exit(0) # write the improper dihedrals if fimp or dfrImpDih: fcontent += """ [ dihedrals ] ; improper dihedrals ; ai aj ak al func params """ if fimp: fcontent += fimp for idih in dfrImpDih: ai, aj, ak, al = [int(x) for x in idih.split()] fcontent += "%6d %6d %6d %6d 4 %6.2f %9.5f %d\n" % ( ai, aj, ak, al, float( dfrImpDih[idih][1]), cal2j(float(dfrImpDih[idih][0])), 2) # write the pairs fcontent += """ [ pairs ] """ pairs = get_pairs(mol) for pair in pairs: ai, aj = [int(x) for x in pair.split()] fcontent += "%6d %6d 1\n" % (ai, aj) return fcontent
def top2dfr(topfile, geomfile, flexfrag, eqgeom, savefrags, topcharges, ffname, path): if "amber" in ffname: potname = "AMBER" else: potname = "OPLS" # get the atomic positions from the geometry file base, ext = os.path.splitext(geomfile) etab = openbabel.OBElementTable() mol = pybel.readfile(ext[1:], geomfile).__next__() molxyzinfo = {} for i, atom in enumerate(mol, 1): molxyzinfo[i] = [etab.GetSymbol(atom.atomicnum), atom.coords] # first pass through the topology file to get the data into a dictionary tdata = {} tdata["[ improper ]"] = [] with open(topfile) as f: while True: line = f.readline() if not line: break if line.strip().startswith(";") or line.strip().startswith( "#") or len(line.strip()) == 0: continue if strip_comment(line).startswith("[ "): if strip_comment(line) not in tdata: key = strip_comment(line) tdata[key] = [] else: # remove comments from the line if "improper" not in line: line = strip_comment(line) if "improper" in line: tdata["[ improper ]"].append(line) else: try: tdata[key].append(line) except: print( "You have a line (%s) of data before assigning a type of entry (such as [ atoms ], [ bonds ] ...)" % (line)) sys.exit(0) # check if atoms were found if "[ atoms ]" not in tdata: print( "The [ atoms ] section was not found in your topology, make sure you're using a single file without #include" ) sys.exit(0) # get the atoms and its positions and parameters in a dictionary atoms = OrderedDict() rdfs = {} atom_num = 1 rdf_label = 1 for line in tdata["[ atoms ]"]: atomlbl = line.split()[0] atoms[atomlbl] = [] fromatomtype = True # if atomtypes are in the topology, use them. Otherwise get from ffnonbonded.itp ffline = "" if "[ atomtypes ]" in tdata: ffline = lookup_ljparam_ifile(line.split()[1], tdata["[ atomtypes ]"]) # get the data from the ffnonbonded.itp if not ffline: fromatomtype = False ffline = lookup_ljparam(line.split()[1], path) # if the atomtype was not found, stop if not ffline: print( "Atom type %s was not found neither in the .itp or the force field directory" % (line.split()[1])) sys.exit(0) # append the data to the list in the same order it will be written in the .dfr atoms[atomlbl].append(str(atom_num)) atom_num += 1 if "amber" in ffname: atomsp = ffline.split()[1] else: atomsp = ffline.split()[2] if atomsp not in rdfs: rdfs[atomsp] = str(rdf_label) rdf_label += 1 atoms[atomlbl].append(rdfs[atomsp]) if "[ atomtypes ]" in tdata: atoms[atomlbl].append(mol.atoms[int(atomlbl) - 1].atomicnum) else: atoms[atomlbl].append(atomsp) x, y, z = molxyzinfo[int(atomlbl)][1] atoms[atomlbl].append(str(x)) atoms[atomlbl].append(str(y)) atoms[atomlbl].append(str(z)) if ("amber" in ffname) or fromatomtype: if topcharges: atoms[atomlbl].append(line.split()[6]) else: atoms[atomlbl].append(ffline.split()[-4]) atoms[atomlbl].append(str(j2cal(float(ffline.split()[-1])))) atoms[atomlbl].append(str(nm2a(float(ffline.split()[-2])))) # the last one will not be printed but is needed to retrieve the force constants if ("[ atomtypes ]" in tdata) and ("opls" in ffname): atoms[atomlbl].append(ffline.split()[1]) else: atoms[atomlbl].append(ffline.split()[0]) else: if topcharges: atoms[atomlbl].append(line.split()[6]) else: atoms[atomlbl].append(ffline.split()[4]) atoms[atomlbl].append(str(j2cal(float(ffline.split()[7])))) atoms[atomlbl].append(str(nm2a(float(ffline.split()[6])))) # the last one will not be printed but is needed to retrieve the opls-aa force constants atoms[atomlbl].append(ffline.split()[1]) # print(atoms[atomlbl]) # now create the fragment connection list from this file and store it in fraginfo # creates a temporary xyz using mkstemp (https://www.logilab.org/blogentry/17873) # this file will be used to get the fragment data fd, temp_path = tempfile.mkstemp(suffix=".xyz") fxyz = os.fdopen(fd, 'w') fxyz.write(mol.write("xyz")) fxyz.close() generate_fragfile(temp_path, "header") base, ext = os.path.splitext(temp_path) fraginfo = [] with open(base + ".dfr") as f: while True: line = f.readline() if line.strip() != "$atoms fragments": continue else: while line.strip() != "$end fragment connection": if flexfrag: fraginfo.append(line.replace("R", "M")) else: fraginfo.append(line) line = f.readline() fraginfo.append(line) break # remove the files os.remove(temp_path) os.remove(base + ".dfr") os.remove(base + ".txt") if savefrags: shutil.move( base + "_fragments", os.path.join( os.path.dirname(os.path.abspath(geomfile)), os.path.splitext(os.path.basename(topfile))[0] + "_fragments")) else: shutil.rmtree(base + "_fragments") # !!! units should be converted as in: http://chembytes.wikidot.com/oplsaagro2tnk !!! # get the bond info bonds = [] for line in tdata["[ bonds ]"]: # get parameters from user's .itp if (len(line.split()) == 5): if eqgeom: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(line.split()[4])) / (200.0), 4)) + "\t" + str( round( mol.OBMol.GetBond(int(line.split( )[0]), int(line.split()[1])).GetLength(), 4)) + "\n") else: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(line.split()[4])) / (200.0), 4)) + "\t" + str(nm2a(float(line.split()[3]))) + "\n") # get parameters from ffbonded.itp else: ffline = lookup_ffbond(atoms[line.split()[0]][9], atoms[line.split()[1]][9], path) if ffline == "not found": bonds.append( line.split()[0] + " " + line.split()[1] + " \tXXX\t" + str( round( mol.OBMol.GetBond(int(line.split( )[0]), int(line.split()[1])).GetLength(), 4)) + "\n") elif eqgeom: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(ffline.split()[4])) / (200.0), 4)) + "\t" + str( round( mol.OBMol.GetBond(int(line.split( )[0]), int(line.split()[1])).GetLength(), 4)) + "\n") else: bonds.append( line.split()[0] + " " + line.split()[1] + " \t" + str(round(j2cal(float(ffline.split()[4])) / (200.0), 4)) + "\t" + str(nm2a(float(ffline.split()[3]))) + "\n") # get the angle info angles = [] for line in tdata["[ angles ]"]: # get parameters from user's .itp if (len(line.split()) == 6): if eqgeom: angles.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(line.split()[5])) / (2.0)) + "\t" + str( round( mol.OBMol.GetAngle( mol.OBMol.GetAtom(int(line.split()[0])), mol.OBMol.GetAtom(int(line.split()[1])), mol.OBMol.GetAtom(int(line.split()[2]))), 4)) + "\n") else: angles.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(line.split()[5])) / (2.0)) + "\t" + str(float(line.split()[4])) + "\n") # get parameters from ffbonded.itp else: ffline = lookup_ffangle(atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], path) if ffline == "not found": angles.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\tXXX\t" + str( round( mol.OBMol.GetAngle( mol.OBMol.GetAtom(int(line.split()[0])), mol.OBMol.GetAtom(int(line.split()[1])), mol.OBMol.GetAtom(int(line.split()[2]))), 4)) + "\n") elif eqgeom: angles.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(ffline.split()[5])) / (2.0)) + "\t" + str( round( mol.OBMol.GetAngle( mol.OBMol.GetAtom(int(line.split()[0])), mol.OBMol.GetAtom(int(line.split()[1])), mol.OBMol.GetAtom(int(line.split()[2]))), 4)) + "\n") else: angles.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " \tharmonic\t" + str(j2cal(float(ffline.split()[5])) / (2.0)) + "\t" + ffline.split()[4] + "\n") # get the dihedrals info dihedrals = [] pline = {} ipline = {} dih9 = False dih4 = False for rline in tdata["[ dihedrals ]"]: line = strip_comment(rline) ffline = "" # get parameters from user's .itp if (len(line.split()) == 11 and line.split()[4] == '3'): V3 = round(-j2cal(float(line.split()[8]) / 2.0), 3) V2 = round(-j2cal(float(line.split()[7])), 3) V1 = round(-2.0 * j2cal(float(line.split()[6])) + 3.0 * V3, 3) dihedrals.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(V1) + "\t" + str(V2) + "\t" + str(V3) + "\t0.0\t0.0\t0.0\n") elif (len(line.split()) == 8 and line.split()[4] == '9'): dih9 = True dihline = "%s %s %s %s" % (line.split()[0], line.split()[1], line.split()[2], line.split()[3]) if dihline in pline: pline[dihline].append(line) else: pline[dihline] = [line] elif (len(line.split()) == 8 and (line.split()[4] == '4' or line.split()[4] == '1')): dih4 = True dihline = "%s %s %s %s" % (line.split()[0], line.split()[1], line.split()[2], line.split()[3]) if dihline in ipline: ipline[dihline].append(line) else: ipline[dihline] = [line] # get parameters from ffbonded.itp elif len(line.split()) == 5: ffline = lookup_ffdihedral(atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], atoms[line.split()[3]][9], int(line.split()[4]), ffname, path) else: print( "Error: something is wrong in dihedral line (%s) maybe the number of parameters?" % (line)) sys.exit(0) # parameters from ffbonded.itp need to be converted and stored if ffline: if ffline == "not found": dihedrals.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\tXXX\tXXX\tXXX" + "\t0.0\t0.0\t0.0\n") continue if "amber" in ffname: # parameters are already of Fourier type, just need to convert to cal if float(ffline.split()[8]) == 0.: dihedrals.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(j2cal(float(ffline.split()[5]))) + "\t" + str(j2cal(float(ffline.split()[6]))) + "\t" + str(j2cal(float(ffline.split()[7]))) + "\t" + str(round(float(ffline.split()[9]), 1)) + "\t" + str(round(float(ffline.split()[10]), 1)) + "\t" + str(round(float(ffline.split()[11]), 1)) + "\n") else: dihedrals.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(j2cal(float(ffline.split()[5]))) + "\t" + str(j2cal(float(ffline.split()[6]))) + "\t" + str(j2cal(float(ffline.split()[7]))) + "\t" + str(j2cal(float(ffline.split()[8]))) + "\t" + str(round(float(ffline.split()[9]), 1)) + "\t" + str(round(float(ffline.split()[10]), 1)) + "\t" + str(round(float(ffline.split()[11]), 1)) + "\t" + str(round(float(ffline.split()[12]), 1)) + "\n") else: if float(ffline.split()[9]) != 0.0 or float( ffline.split()[10]) != 0.0: print( "Parameters for %s - %s - %s - %s dihedrals are undefined, please treat by hand!" % (atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], atoms[line.split()[3]][9])) V3 = round(-j2cal(float(ffline.split()[8]) / 2.0), 3) V2 = round(-j2cal(float(ffline.split()[7])), 3) V1 = round(-2.0 * j2cal(float(ffline.split()[6])) + 3.0 * V3, 3) dihedrals.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + potname + "\t" + str(V1) + "\t" + str(V2) + "\t" + str(V3) + "\t0.0\t0.0\t0.0\n") # after reading all the dihedrals, if type 9 was used, we need to convert if dih9: for kdih in pline: params = [0.0] * 6 for line in pline[kdih]: n = int(line.split()[7]) # have to multiply by 2.0 since I use the 0.5*(...) version of the AMBER definition params[n - 1] = 2.0 * float(line.split()[6]) params[n + 2] = float(line.split()[5]) retline = "%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f" % ( kdih, 9, params[0], params[1], params[2], params[3], params[4], params[5]) dihedrals.append(retline.split()[0] + " " + retline.split()[1] + " " + retline.split()[2] + " " + retline.split()[3] + " \t" + potname + "\t" + str(j2cal(float(retline.split()[5]))) + "\t" + str(j2cal(float(retline.split()[6]))) + "\t" + str(j2cal(float(retline.split()[7]))) + "\t" + retline.split()[8] + "\t" + retline.split()[9] + "\t" + retline.split()[10] + "\n") # finally, add the improper dihedrals described as proper dihedrals if dih4: for kdih in ipline: params = [0.0] * 6 for line in ipline[kdih]: n = int(line.split()[7]) # have to multiply by 2.0 since I use the 0.5*(...) version of the AMBER definition params[n - 1] = 2.0 * float(line.split()[6]) params[n + 2] = float(line.split()[5]) retline = "%s\t%d\t%.3f\t%.3f\t%.3f\t%.2f\t%.2f\t%.2f" % ( kdih, 9, params[0], params[1], params[2], params[3], params[4], params[5]) dihedrals.append(retline.split()[0] + " " + retline.split()[1] + " " + retline.split()[2] + " " + retline.split()[3] + " \t" + "AMBER" + "\t" + str(j2cal(float(retline.split()[5]))) + "\t" + str(j2cal(float(retline.split()[6]))) + "\t" + str(j2cal(float(retline.split()[7]))) + "\t" + retline.split()[8] + "\t" + retline.split()[9] + "\t" + retline.split()[10] + "\n") # get the improper dihedrals info improper = [] for line in tdata["[ improper ]"]: if len(strip_comment(line).split()) >= 7: improper.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + str(round(j2cal(float(line.split()[6])), 3)) + "\t" + line.split()[5] + "\n") else: if "opls" in ffname: ffline = lookup_ffimproper(line.split()[5], path) improper.append( line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + str(round(j2cal(float(ffline.split()[3])), 3)) + "\t" + ffline.split()[2] + "\n") else: ffline = lookup_ffdihedral(atoms[line.split()[0]][9], atoms[line.split()[1]][9], atoms[line.split()[2]][9], atoms[line.split()[3]][9], 4, path) improper.append(line.split()[0] + " " + line.split()[1] + " " + line.split()[2] + " " + line.split()[3] + " \t" + "OPLS" + "\t" + str(j2cal(float(ffline.split()[5]))) + "\t" + str(j2cal(float(ffline.split()[6]))) + "\t" + str(j2cal(float(ffline.split()[7]))) + "\t" + ffline.split()[8] + "\t" + ffline.split()[9] + "\t" + ffline.split()[10] + "\n") # print everything to the output file base, ext = os.path.splitext(geomfile) with open(base + ".txt", "w") as f: f.write("*\n1\n") f.write( str(len(atoms)) + " \t %s (generated with gromacs2dice)\n" % (os.path.basename(base))) for atom, data in atoms.items(): f.write( "%2d %2d \t %7.4f \t %7.4f \t %7.4f \t %7.4f \t %7.4f \t %7.4f\n" % (int(data[1]), int(data[2]), float(data[3]), float( data[4]), float(data[5]), float(data[6]), float( data[7]), float(data[8]))) f.write("$end\n") with open(base + ".dfr", "w") as f: for line in fraginfo: f.write(line) f.write("\n$bond\n") for line in bonds: f.write(line) f.write("$end bond\n\n$angle\n") for line in angles: f.write(line) f.write("$end angle\n\n$dihedral\n") for line in dihedrals: f.write(line) if dih4: for line in improper: f.write(line) f.write("$end dihedral\n") if improper: for line in improper: print("\n$improper dihedral\n") f.write(line) f.write("$end improper dihedral\n") if not flexfrag: withoutba = clean_dofs(base + ".dfr") with open(base + ".dfr", 'w') as f: f.write(withoutba) print("The files %s and %s were successfully generated." % (base + ".txt", base + ".dfr"))
def draw(self, show=True, filename=None, update=False, usecoords=False, method="mcdl"): """Create a 2D depiction of the molecule. Optional parameters: show -- display on screen (default is True) filename -- write to file (default is None) update -- update the coordinates of the atoms to those determined by the structure diagram generator (default is False) usecoords -- don't calculate 2D coordinates, just use the current coordinates (default is False) method -- two methods are available for calculating the 2D coordinates: OpenBabel's "mcdl" (the default), or "oasa" (from the OASA toolkit) OASA is used for depiction. Tkinter and Python Imaging Library are required for image display. """ etab = ob.OBElementTable() if not oasa: errormessage = ("OASA not found, but is required for depiction. " "OASA is part of BKChem. " "See installation instructions for more " "information.") raise ImportError(errormessage) if method not in ["mcdl", "oasa"]: raise ValueError("Method '%s' not recognised. Should be either" " 'mcdl' or 'oasa'.") workingmol = self if method == "mcdl": if not update: # Call gen2D on a clone workingmol = Molecule(self) if not usecoords: _operations['gen2D'].Do(workingmol.OBMol) usecoords = True # Use the workingmol's coordinates mol = oasa.molecule() for atom in workingmol.atoms: v = mol.create_vertex() v.symbol = etab.GetSymbol(atom.atomicnum) v.charge = atom.formalcharge if usecoords: v.x, v.y, v.z = atom.coords[0] * 30., atom.coords[1] * 30., 0.0 mol.add_vertex(v) for bond in ob.OBMolBondIter(workingmol.OBMol): e = mol.create_edge() e.order = bond.GetBO() if bond.IsHash(): e.type = "h" elif bond.IsWedge(): e.type = "w" mol.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1, e) # I'm sure there's a more elegant way to do the following, but here goes... # let's set the stereochemistry around double bonds self.write("can") # Perceive UP/DOWNness for bond in ob.OBMolBondIter(workingmol.OBMol): ends = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() if bond.GetBO() == 2: stereobonds = [[b for b in ob.OBAtomBondIter(workingmol.OBMol.GetAtom(x)) if b.GetIdx() != bond.GetIdx() and (b.IsUp() or b.IsDown())] for x in ends] if stereobonds[0] and stereobonds[1]: # Needs to be defined at either end if stereobonds[0][0].IsUp() == stereobonds[1][0].IsUp(): # Either both up or both down stereo = oasa.stereochemistry.cis_trans_stereochemistry.SAME_SIDE else: stereo = oasa.stereochemistry.cis_trans_stereochemistry.OPPOSITE_SIDE atomids = [(b[0].GetBeginAtomIdx(), b[0].GetEndAtomIdx()) for b in stereobonds] extremes = [] for id, end in zip(ends, atomids): if end[0] == id: extremes.append(end[1]) else: extremes.append(end[0]) center = mol.get_edge_between(mol.atoms[ends[0] - 1], mol.atoms[ends[1] - 1]) st = oasa.stereochemistry.cis_trans_stereochemistry( center = center, value = stereo, references = (mol.atoms[extremes[0] - 1], mol.atoms[ends[0] - 1], mol.atoms[ends[1] - 1], mol.atoms[extremes[1] - 1])) mol.add_stereochemistry(st) mol.remove_unimportant_hydrogens() if method == "oasa" and not usecoords: oasa.coords_generator.calculate_coords(mol, bond_length=30) if update: newcoords = [(v.x / 30., v.y / 30., 0.0) for v in mol.vertices] for atom, newcoord in zip(ob.OBMolAtomIter(self.OBMol), newcoords): atom.SetVector(*newcoord) if filename or show: maxx = max([v.x for v in mol.vertices]) minx = min([v.x for v in mol.vertices]) maxy = max([v.y for v in mol.vertices]) miny = min([v.y for v in mol.vertices]) maxcoord = max(maxx - minx, maxy - miny) fontsize = 16 bondwidth = 6 linewidth = 2 if maxcoord > 270: # 300 - margin * 2 for v in mol.vertices: v.x *= 270. / maxcoord v.y *= 270. / maxcoord fontsize *= math.sqrt(270. / maxcoord) bondwidth *= math.sqrt(270. / maxcoord) linewidth *= math.sqrt(270. / maxcoord) if filename: filedes = None else: filedes, filename = tempfile.mkstemp() canvas = oasa.cairo_out.cairo_out() canvas.show_hydrogens_on_hetero = True canvas.font_size = fontsize canvas.bond_width = bondwidth canvas.line_width = linewidth canvas.mol_to_cairo(mol, filename) if show: if not tk: errormessage = ("Tkinter or Python Imaging " "Library not found, but is required for image " "display. See installation instructions for " "more information.") raise ImportError(errormessage) root = tk.Tk() root.title((hasattr(self, "title") and self.title) or self.__str__().rstrip()) frame = tk.Frame(root, colormap="new", visual='truecolor').pack() image = PIL.open(filename) imagedata = piltk.PhotoImage(image) label = tk.Label(frame, image=imagedata).pack() quitbutton = tk.Button(root, text="Close", command=root.destroy).pack(fill=tk.X) root.mainloop() if filedes: os.close(filedes) os.remove(filename)
#!/usr/bin/env python # python modules from omg import geom import numpy as np import openbabel as ob #create a periodic table object PERIODIC_TABLE = ob.OBElementTable() class Atom(ob.OBAtom): """A container for all the information relative to an atom Attributes: type (str) x (float) y (float) z (float) Other info is kept in its own object inside atom: - mm (molecular mechanics) - oniom (oniom info) - resinfo (information about the residue the atom belongs to) - pdbinfo (information for pdb files) """ def __init__(self, atype, xyz): """ Args:
def __init__(self, add_hydrogens: bool = False) -> None: self._add_hydrogens = add_hydrogens self._edge_types = set() self._node_types = set() self._element_tabel = openbabel.OBElementTable()
class Compound(object): _obElements = openbabel.OBElementTable() def __init__(self, database, compound_id, inchi, pKas, majorMSpH7, nHs, zs): self.database = database self.compound_id = compound_id self.inchi = inchi self.pKas = pKas self.majorMSpH7 = majorMSpH7 self.nHs = nHs self.zs = zs @staticmethod def from_kegg(cid): inchi = Compound.get_inchi(cid) if inchi is None: pKas = [] majorMSpH7 = -1 nHs = [] zs = [] else: pKas, majorMSpH7, nHs, zs = Compound.get_species_pka(inchi) return Compound('KEGG', 'C%05d' % cid, inchi, pKas, majorMSpH7, nHs, zs) def to_json_dict(self): return { 'database': self.database, 'id': self.compound_id, 'inchi': self.inchi, 'pKas': self.pKas, 'majorMSpH7': self.majorMSpH7, 'nHs': self.nHs, 'zs': self.zs } @staticmethod def from_json_dict(d): return Compound(d['database'], d['id'], d['inchi'], d['pKas'], d['majorMSpH7'], d['nHs'], d['zs']) @staticmethod def get_inchi(cid): s_mol = urllib.urlopen('http://rest.kegg.jp/get/cpd:C%05d/mol' % cid).read() return Compound.mol2inchi(s_mol) @staticmethod def mol2inchi(s): openbabel.obErrorLog.SetOutputLevel(-1) conv = openbabel.OBConversion() conv.SetInAndOutFormats('mol', 'inchi') conv.AddOption("F", conv.OUTOPTIONS) conv.AddOption("T", conv.OUTOPTIONS) conv.AddOption("x", conv.OUTOPTIONS, "noiso") conv.AddOption("w", conv.OUTOPTIONS) obmol = openbabel.OBMol() if not conv.ReadString(obmol, s): return None inchi = conv.WriteString(obmol, True) # second argument is trimWhitespace if inchi == '': return None else: return inchi @staticmethod def smiles2inchi(smiles): openbabel.obErrorLog.SetOutputLevel(-1) conv = openbabel.OBConversion() conv.SetInAndOutFormats('smiles', 'inchi') conv.AddOption("F", conv.OUTOPTIONS) conv.AddOption("T", conv.OUTOPTIONS) conv.AddOption("x", conv.OUTOPTIONS, "noiso") conv.AddOption("w", conv.OUTOPTIONS) obmol = openbabel.OBMol() conv.ReadString(obmol, smiles) inchi = conv.WriteString(obmol, True) # second argument is trimWhitespace if inchi == '': return None else: return inchi @staticmethod def get_atom_bag_and_charge_from_inchi(inchi): if inchi is None: return {}, 0 fixed_charge = 0 for q in re.findall('/q([0-9\+\-\;]+)', inchi): for s in q.split(';'): if s: fixed_charge += int(s) atom_bag = {} # the /f field gives the fixed-H structure tokens = re.findall('/f([0-9A-Za-z\.]+/)', inchi) # if /f is not given, use the main formula and # adjust the number of protons according to the /p field if len(tokens) == 0: tokens = re.findall('InChI=1S?/([0-9A-Za-z\.]+)', inchi) for p in re.findall('/p([0-9\+\-\;]+)', inchi): for s in p.split(';'): if s: atom_bag['H'] = atom_bag.get('H', 0) + int(s) if len(tokens) == 1: formula = tokens[0] elif len(tokens) > 1: raise ValueError('Bad InChI: ' + inchi) else: formula = '' for mol_formula_times in formula.split('.'): for times, mol_formula in re.findall('^(\d+)?(\w+)', mol_formula_times): if not times: times = 1 else: times = int(times) for atom, count in re.findall("([A-Z][a-z]*)([0-9]*)", mol_formula): if count == '': count = 1 else: count = int(count) atom_bag[atom] = atom_bag.get(atom, 0) + count * times return atom_bag, fixed_charge @staticmethod def get_species_pka(inchi): if inchi is None: return [], -1, [], [] try: pKas, major_ms = GetDissociationConstants(inchi) pKas = sorted( [pka for pka in pKas if pka > MIN_PH and pka < MAX_PH], reverse=True) major_ms_inchi = Compound.smiles2inchi(major_ms) except ChemAxonError: logging.warning('chemaxon failed to find pKas for this inchi: ' + inchi) pKas = [] major_ms_inchi = inchi atom_bag, major_ms_charge = Compound.get_atom_bag_and_charge_from_inchi( major_ms_inchi) major_ms_nH = atom_bag.get('H', 0) n_species = len(pKas) + 1 if pKas == []: majorMSpH7 = 0 else: majorMSpH7 = len([1 for pka in pKas if pka > 7]) nHs = [] zs = [] for i in xrange(n_species): zs.append((i - majorMSpH7) + major_ms_charge) nHs.append((i - majorMSpH7) + major_ms_nH) return pKas, majorMSpH7, nHs, zs def __str__(self): return "%s\nInChI: %s\npKas: %s\nmajor MS: nH = %d, charge = %d" % \ (self.compound_id, self.inchi, ', '.join(['%.2f' % p for p in self.pKas]), self.nHs[self.majorMSpH7], self.zs[self.majorMSpH7]) def get_atom_bag_with_electrons(self): """ Calculates the number of electrons in a given molecule Returns: a dictionary of all element counts and also electron count ('e-') """ if self.inchi is None: return None atom_bag, charge = Compound.get_atom_bag_and_charge_from_inchi( self.inchi) n_protons = sum([ count * Compound._obElements.GetAtomicNum(str(elem)) for (elem, count) in atom_bag.iteritems() ]) atom_bag['e-'] = n_protons - charge return atom_bag def transform(self, pH, I, T): if self.inchi is None: return 0 elif self.pKas == []: dG0s = np.zeros((1, 1)) else: dG0s = -np.cumsum([0] + self.pKas) * R * T * np.log(10) dG0s = dG0s - dG0s[self.majorMSpH7] DH = debye_huckel((I, T)) # dG0' = dG0 + nH * (R T ln(10) pH + DH) - charge^2 * DH pseudoisomers = np.vstack( [dG0s, np.array(self.nHs), np.array(self.zs)]).T dG0_prime_vector = pseudoisomers[:, 0] + \ pseudoisomers[:, 1] * (R * T * np.log(10) * pH + DH) - \ pseudoisomers[:, 2]**2 * DH return -R * T * logsumexp(dG0_prime_vector / (-R * T))
class Molecule(object): # for more rendering options visit: # http://www.ggasoftware.com/opensource/indigo/api/options#rendering _obElements = openbabel.OBElementTable() _obSmarts = openbabel.OBSmartsPattern() @staticmethod def GetNumberOfElements(): return Molecule._obElements.GetNumberOfElements() @staticmethod def GetAllElements(): return [Molecule._obElements.GetSymbol(i) for i in xrange(Molecule.GetNumberOfElements())] @staticmethod def GetSymbol(atomic_num): return Molecule._obElements.GetSymbol(atomic_num) @staticmethod def GetAtomicNum(elem): if type(elem) == types.UnicodeType: elem = str(elem) return Molecule._obElements.GetAtomicNum(elem) @staticmethod def VerifySmarts(smarts): return Molecule._obSmarts.Init(smarts) def __init__(self): self.title = None self.obmol = openbabel.OBMol() self.smiles = None self.inchi = None def __str__(self): return self.title or self.smiles or self.inchi or "" def __len__(self): return self.GetNumAtoms() def Clone(self): tmp = Molecule() tmp.title = self.title tmp.obmol = openbabel.OBMol(self.obmol) tmp.smiles = self.smiles tmp.inchi = self.inchi return tmp def SetTitle(self, title): self.title = title @staticmethod def FromSmiles(smiles): m = Molecule() m.smiles = smiles obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInFormat("smiles") if not obConversion.ReadString(m.obmol, m.smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) try: m.UpdateInChI() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from SMILES: " + smiles) m.SetTitle(smiles) return m @staticmethod def FromInChI(inchi): m = Molecule() m.inchi = inchi obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInFormat("inchi") obConversion.ReadString(m.obmol, m.inchi) try: m.UpdateSmiles() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from InChI: " + inchi) m.SetTitle(inchi) return m @staticmethod def FromMol(mol): m = Molecule() obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInFormat("mol") obConversion.ReadString(m.obmol, mol) try: m.UpdateInChI() m.UpdateSmiles() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from MOL file:\n" + mol) m.SetTitle("") return m @staticmethod def FromOBMol(obmol): m = Molecule() m.obmol = obmol try: m.UpdateInChI() m.UpdateSmiles() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from OBMol") m.SetTitle("") return m @staticmethod def _FromFormat(s, fmt='inchi'): if fmt == 'smiles' or fmt == 'smi': return Molecule.FromSmiles(s) if fmt == 'inchi': return Molecule.FromInChI(s) if fmt == 'mol': return Molecule.FromMol(s) if fmt == 'obmol': return Molecule.FromOBMol(s) @staticmethod def _ToFormat(obmol, fmt='inchi'): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetOutFormat(fmt) res = obConversion.WriteString(obmol) if not res: raise OpenBabelError("Cannot convert OBMol to %s" % fmt) if fmt == 'smiles' or fmt == 'smi': res = res.split() if res == []: raise OpenBabelError("Cannot convert OBMol to %s" % fmt) else: return res[0] elif fmt == 'inchi': return res.strip() else: return res @staticmethod def Smiles2InChI(smiles): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInAndOutFormats("smiles", "inchi") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) return obConversion.WriteString(obmol).strip() @staticmethod def InChI2Smiles(inchi): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInAndOutFormats("inchi", "smiles") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, inchi): raise OpenBabelError("Cannot read the InChI string: " + inchi) return obConversion.WriteString(obmol).split()[0] def RemoveHydrogens(self): self.obmol.DeleteHydrogens() def RemoveAtoms(self, indices): self.obmol.BeginModify() for i in sorted(indices, reverse=True): self.obmol.DeleteAtom(self.obmol.GetAtom(i+1)) self.obmol.EndModify() self.smiles = None self.inchi = None def SetAtomicNum(self, index, new_atomic_num): self.obmol.GetAtom(index+1).SetAtomicNum(new_atomic_num) self.smiles = None self.inchi = None def ToOBMol(self): return self.obmol def ToFormat(self, fmt='inchi'): return Molecule._ToFormat(self.obmol, fmt=fmt) def ToMolfile(self): return self.ToFormat('mol') def UpdateInChI(self): self.inchi = Molecule._ToFormat(self.obmol, 'inchi') def ToInChI(self): """ Lazy storage of the InChI identifier (calculate once only when asked for and store for later use). """ if not self.inchi: self.UpdateInChI() return self.inchi def UpdateSmiles(self): self.smiles = Molecule._ToFormat(self.obmol, 'smiles') def ToSmiles(self): """ Lazy storage of the SMILES identifier (calculate once only when asked for and store for later use). """ if not self.smiles: self.UpdateSmiles() return self.smiles def GetFormula(self): tokens = re.findall('InChI=1S?/([0-9A-Za-z\.]+)', self.ToInChI()) if len(tokens) == 1: return tokens[0] elif len(tokens) > 1: raise ValueError('Bad InChI: ' + self.ToInChI()) else: return '' def GetExactMass(self): return self.obmol.GetExactMass() def GetAtomBagAndCharge(self): inchi = self.ToInChI() fixed_charge = 0 for s in re.findall('/q([0-9\+\-]+)', inchi): fixed_charge += int(s) fixed_protons = 0 for s in re.findall('/p([0-9\+\-]+)', inchi): fixed_protons += int(s) formula = self.GetFormula() atom_bag = {} for mol_formula_times in formula.split('.'): for times, mol_formula in re.findall('^(\d+)?(\w+)', mol_formula_times): if not times: times = 1 else: times = int(times) for atom, count in re.findall("([A-Z][a-z]*)([0-9]*)", mol_formula): if count == '': count = 1 else: count = int(count) atom_bag[atom] = atom_bag.get(atom, 0) + count * times if fixed_protons: atom_bag['H'] = atom_bag.get('H', 0) + fixed_protons fixed_charge += fixed_protons return atom_bag, fixed_charge def GetHydrogensAndCharge(self): atom_bag, charge = self.GetAtomBagAndCharge() return atom_bag.get('H', 0), charge def GetNumElectrons(self): """Calculates the number of electrons in a given molecule.""" atom_bag, fixed_charge = self.GetAtomBagAndCharge() n_protons = 0 for elem, count in atom_bag.iteritems(): n_protons += count * self._obElements.GetAtomicNum(elem) return n_protons - fixed_charge def GetNumAtoms(self): return self.obmol.NumAtoms() def GetAtoms(self): return [self.obmol.GetAtom(i+1) for i in xrange(self.obmol.NumAtoms())] def FindSmarts(self, smarts): """ Corrects the pyBel version of Smarts.findall() which returns results as tuples, with 1-based indices even though Molecule.atoms is 0-based. Args: mol: the molecule to search in. smarts_str: the SMARTS query to search for. Returns: The re-mapped list of SMARTS matches. """ Molecule._obSmarts.Init(smarts) if Molecule._obSmarts.Match(self.obmol): match_list = Molecule._obSmarts.GetMapList() shift_left = lambda m: [(n - 1) for n in m] return map(shift_left, match_list) else: return [] def GetAtomCharges(self): """ Returns: A list of charges, according to the number of atoms in the molecule """ return [atom.GetFormalCharge() for atom in self.GetAtoms()]
else: ipython_notebook = False except NameError: ipython_notebook = False try: __version__ = check_output(['obabel', '-V']).split()[2].decode('ascii') except Exception as e: __version__ = '' # setup typetable to translate atom types typetable = OBTypeTable() typetable.SetFromType('INT') typetable.SetToType('SYB') # setup ElementTable elementtable = ob.OBElementTable() # hash OB! ob.obErrorLog.StopLogging() def _filereader_mol2(filename, opt=None): block = '' data = '' n = 0 with gzip.open(filename) if filename.split('.')[-1] == 'gz' else open( filename) as f: for line in f: if line[:1] == '#': data += line elif line[:17] == '@<TRIPOS>MOLECULE':
def pic(self, filename, picformat='svg'): """ Generates a graphical file with 2D-representation of the resonance structure """ try: import openbabel as ob except: print "Cannot import openbabel" return #ValEl = {'H':1, 'B':3,'C':4,'N':5,'O':6,'F':7,'S':6} #ValEl = {'1':1, '5':3,'6':4,'7':5,'8':6,'9':7,'16':6} # Import Element Numbers ati = [] Sym2Num = ob.OBElementTable() for a in self.symbols: ElNum = Sym2Num.GetAtomicNum(a) ati.append(ElNum) # Import connections conn = self.data mol = ob.OBMol() # Create atoms for a in ati: at = ob.OBAtom() at.SetAtomicNum(a) mol.AddAtom(at) # Create connections val = [] total_LP = 0 for i in range(len(conn)): total_LP += conn[i][i] for i in range(len(conn)): val.append(conn[i][i] * 2) for j in range(i): if conn[i][j] == 0: continue val[i] += conn[i][j] val[j] += conn[i][j] atA = mol.GetAtomById(i) atB = mol.GetAtomById(j) b = ob.OBBond() b.SetBegin(atA) b.SetEnd(atB) b.SetBO(int(conn[i][j])) mol.AddBond(b) for i in range(len(conn)): atA = mol.GetAtomById(i) atAN = atA.GetAtomicNum() FormValEl = CountValenceEl(atAN) #if total_LP == 0: # if atAN == 1: # FullShell = 2 # else: # FullShell = 8 # FormCharge = FormValEl + int(val[i]) - FullShell #else: FormCharge = int(FormValEl - val[i]) #print "atAN, FormValEl, val[i], FullShell" #print atAN, FormValEl, val[i], FullShell #FormCharge = FormCharge % 2 atA.SetFormalCharge(FormCharge) # Export file mol.DeleteNonPolarHydrogens() conv = ob.OBConversion() conv.SetOutFormat(picformat) conv.AddOption('C') conv.WriteFile(mol, filename)