def remove_labile_protons(sdffile, lbls, shifts): f = sdffile.split('.sdf')[0] + '.sdf' obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, f) CI = [] for atom in OBMolAtomIter(obmol): if atom.GetAtomicNum() == 1: for NbrAtom in OBAtomAtomIter(atom): if (NbrAtom.GetAtomicNum() == 8): CI.append('H' + str(atom.GetIndex() + 1)) #remove these carbons for C in CI: ind = lbls.index(C) lbls.remove(C) for l in shifts: l.pop(ind) return lbls, shifts
def methyl_protons(file): obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, file) methyl_protons = [] for atom in OBMolAtomIter(obmol): count = 0 nbrprotons = [] for NbrAtom in OBAtomAtomIter(atom): if (atom.GetAtomicNum() == 6) & (NbrAtom.GetAtomicNum() == 1): l = NbrAtom.GetIndex() count += 1 nbrprotons.append('H' + str(l + 1)) if count == 3: methyl_protons.append(nbrprotons) return methyl_protons
def convert_babel_to_system(mol): """ Convert a BigDFT fragment to an open babel molecule. Args: mol (openbabel.OBMol): the molecule to convert. Returns: (BigDFT.Systems.System): bigdft system. """ from BigDFT.IO import read_mol2 from openbabel.openbabel import OBConversion # py2 workaround from sys import version_info if version_info[0] < 3: from io import BytesIO as StringIO else: try: from io import StringIO except ImportError: from StringIO import StringIO conv = OBConversion() conv.SetOutFormat("mol2") sval = StringIO(conv.WriteString(mol)) return read_mol2(sval)
def convert_system_to_babel(sys): """ Convert a BigDFT system to an open babel molecule. Args: sys (BigDFT.Systems.System): the system to convert. Returns: (openbabel.OBMol): an open babel type molecule. """ from BigDFT.IO import write_pdb from openbabel.openbabel import OBMol, OBConversion # py2 workaround from sys import version_info if version_info[0] < 3: from io import BytesIO as StringIO else: try: from io import StringIO except ImportError: from StringIO import StringIO # We convert by way of pdb file. conv = OBConversion() conv.SetInFormat("pdb") sval = StringIO() write_pdb(sys, sval) mol = OBMol() conv.ReadString(mol, sval.getvalue()) return mol
def Karplus(f, inputformat): obconversion = OBConversion() obconversion.SetInFormat(inputformat) obmol = OBMol() obconversion.ReadFile(obmol, f) obmol.ConnectTheDots() obmol.Kekulize() DihedralHs = [] for atom in OBMolAtomIter(obmol): if atom.GetAtomicNum() == 1: DihedNeighbours = GetDihedralHs(atom) if DihedNeighbours != 0: DihedralHs.append([atom.GetIdx()] + DihedNeighbours) if len(DihedralHs) == 0: print("No dihedral protons found, Karplus J value prediction " + \ "impossible, quitting.") quit() Jmatrix, Jlabels = CalcJMatrix(obmol, DihedralHs) Jlabels = [str(x) for x in Jlabels] return Jmatrix, Jlabels
def GetHcons(f): obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, f) Hcons = [] for atom in OBMolAtomIter(obmol): idx = atom.GetIdx() anum = atom.GetAtomicNum() if anum == 1: for NbrAtom in OBAtomAtomIter(atom): Hcons.append([idx, NbrAtom.GetIdx()]) return Hcons
def labile_protons(file): obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, file) count = 0 for atom in OBMolAtomIter(obmol): for NbrAtom in OBAtomAtomIter(atom): if (atom.GetAtomicNum() == 8) & (NbrAtom.GetAtomicNum() == 1): count += 1 return count
def FixTautProtons(f, inchi, AuxInfo): #Get tautomeric protons and atoms they are connected to from Inchi TautProts = GetTautProtons(inchi) amap = GetInchiRenumMap(AuxInfo) #get the correspondence of the Inchi numbers to the source numbers hmap = [] for taut in TautProts: for heavyatom in range(1, len(taut)): hmap.append([int(taut[heavyatom]), amap[int(taut[heavyatom]) - 1]]) #Read molecule from file obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, f) Fixprotpos = [] for heavyatom in hmap: atom = obmol.GetAtom(heavyatom[1]) for nbratom in OBAtomAtomIter(atom): if nbratom.GetAtomicNum() == 1: Fixprotpos.append(heavyatom[0]) draftFH = [] for i in range(0, len(Fixprotpos)): if Fixprotpos[i] not in [a[0] for a in draftFH]: draftFH.append([Fixprotpos[i], Fixprotpos.count(Fixprotpos[i])]) fixedlayer = '/f/h' for h in draftFH: if h[1] == 1: fixedlayer = fixedlayer + str(h[0]) + 'H,' else: fixedlayer = fixedlayer + str(h[0]) + 'H' + str(h[1]) + ',' resinchi = inchi + fixedlayer[:-1] return resinchi
def compute_smiles(sys): """ Computes the SMILES representation of a given system. Args: sys (BigDFT.System.Systems): the system to compute the representation of. Return: (str): the smiles representation of this molecule. """ from openbabel.openbabel import OBConversion conv = OBConversion() mol = convert_system_to_babel(sys) conv.SetOutFormat("SMI") retstr = conv.WriteString(mol) retstr = retstr.replace("\n", "") retstr = retstr.replace("\t", "") return retstr
def RestoreNumsSDF(f, fold, AuxInfo): #Read molecule from file obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, f) #Get the atoms Hs are connected to oldHcons = GetHcons(fold) #translate the H connected atoms to the new numbering system amap = GetInchiRenumMap(AuxInfo) for i in range(0, len(oldHcons)): oldHcons[i][1] = amap.index(oldHcons[i][1]) + 1 newHcons = [] temp = [] i = 0 for atom in OBMolAtomIter(obmol): idx = atom.GetIdx() anum = atom.GetAtomicNum() #If atom is hydrogen, check what it is connected to if anum == 1: for NbrAtom in OBAtomAtomIter(atom): newHcons.append([idx, NbrAtom.GetIdx()]) #Pick the temporary atom temp.append(atom) for i in range(0, len(newHcons)): conatom = newHcons[i][1] for b in range(0, len(oldHcons)): if conatom == oldHcons[b][1]: amap.append(oldHcons[b][0]) #remove the number, so that it doesn't get added twice oldHcons[b][1] = 0 newmol = OBMol() added = [] for i in range(1, len(amap) + 1): newn = amap.index(i) newmol.AddAtom(temp[newn]) added.append(newn) #Final runthrough to check that all atoms have been added, #tautomeric protons can be missed. If tautomeric proton tracking #is implemented this can be removed for i in range(0, len(temp)): if not i in added: newmol.AddAtom(temp[i]) #Restore the bonds newmol.ConnectTheDots() newmol.PerceiveBondOrders() #Write renumbered molecule to file obconversion.SetOutFormat("sdf") obconversion.WriteFile(newmol, f)
def runvina(infile, outfile, receptor, tmp_file='test.pdbqt', vina=None): obconversion = OBConversion() obconversion.SetInFormat("sdf") obconversion.SetOutFormat("pdbqt") obmol = OBMol() notatend = obconversion.ReadFile(obmol, infile) obmol2 = OBMol(obmol) ofs = pybel.Outputfile("sdf", outfile, overwrite=True) pbar = tqdm() while notatend: pbar.update(1) if obconversion.WriteFile(obmol, tmp_file): try: x = subprocess.check_output([ vina, "--score_only", "--receptor", receptor, "--ligand", tmp_file ], shell=False) # x2 = subprocess.check_output(["/Users/austin/Downloads/rf-score-4/rf-score", "/Users/austin/Downloads/rf-score-4/pdbbind-2014-refined.rf", receptor, tmp_file]) # print(x2) mol2 = pybel.Molecule(obmol2) mol2.data.update({'AutodockVinaRescoreOnly': str(get_aff(x))}) ofs.write(mol2) except subprocess.CalledProcessError as e: print(e) ofs.write(obmol) except ValueError as e: print(e) ofs.write(obmol) else: print("error writing") obmol = OBMol() notatend = obconversion.Read(obmol) obmol2 = OBMol(obmol) pbar.close() print("FAILED")
def mol_fragments(mole,outfile): obconv = OBConversion() obconv.SetOutFormat("xyz") c = 1 for atom, exp,dft,diff in zip(OBMolAtomIter(mole)): # if this is a carbon atom start a breadth first search for other carbon atoms with depth specified # create a new mol instance new_mol = OBMol() # add this atom # new_mol.AddAtom(atom) fragment_ind = [] l = atom.GetIndex() fragment_ind.append(l) # for iteration depth radius old_queue = [atom] for iteration in range(0, 3): new_queue = [] for a in old_queue: for atom2 in OBAtomAtomIter(a): i = atom2.GetIndex() # if the atom has not been seen before add it to the fragment ind list and to the new molecule if i not in fragment_ind: new_queue.append(atom2) fragment_ind.append(i) # new_mol.AddAtom(atom2) old_queue = copy.copy(new_queue) fragment_ind = [fragment_ind[0]] + sorted(fragment_ind[1:]) for i in fragment_ind: for a in OBMolAtomIter(mole): if a.GetIndex() == i: new_mol.AddAtom(a) f = open(outfile + "frag" + str(l).zfill(3) + ".xyz", "w+") f.write(str(new_mol.NumAtoms()) + "\n\n") i = 0 for atom in OBMolAtomIter(new_mol): f.write(atom.GetType()[0] + " " + str(atom.GetX()) + " " + str(atom.GetY()) + " " + str( atom.GetZ()) + "\n") i+=1 f.close() c += 1
def ProcessIsomers(dp5Data, Isomers,Settings): OutputFolder = Path(Settings.OutputFolder) # extract calculated and experimental shifts and add to dp5Data instance # Carbon # make sure any shifts with missing peaks are removed from all isomers removedC = [] no_exp_data = 0 for iso in Isomers: if len(iso.Cexp) == 0: no_exp_data +=1 if no_exp_data == len(Isomers): print("no experimental NMR data provided... quitting") quit() for iso in Isomers: dp5Data.Cexp.append([]) dp5Data.Cshifts.append([]) dp5Data.Clabels.append([]) dp5Data.Cinds.append([]) dp5Data.ConfCshifts.append([]) a_ind = 0 exp_inds = [] for shift, exp, label in zip(iso.Cshifts, iso.Cexp, iso.Clabels): if exp != '': dp5Data.Cshifts[-1].append(shift) dp5Data.Cexp[-1].append(exp) dp5Data.Clabels[-1].append(label) dp5Data.Cinds[-1].append(int(label[1:]) - 1) exp_inds.append(a_ind) a_ind += 1 if len(iso.ConformerCShifts) > 0: for conf_shifts in iso.ConformerCShifts: dp5Data.ConfCshifts[-1].append( [conf_shifts[e] for e in exp_inds]) #write qml compound objects and atomic representations #check the number of atoms in the structures #if there are less than 86 (max number of atoms in a molecule in the training set) atoms if dp5Data.Atom_number < 86: for iso in Isomers: #open new xyz file InputFile = Path(iso.InputFile) #find conformer with the lowest energy dp5Data.AtomReps.append([]) for i,geom in enumerate(iso.DFTConformers): xyz_file = open(str(OutputFolder / "dp5" /InputFile.stem) + "_" +str(i).zfill(3) + ".xyz", "w") xyz_file.write(str(len(iso.Atoms)) + "\n" + "\n") for atom, coords in zip(iso.Atoms, geom): xyz_file.write(atom + " " + str(coords[0]) + " " + str(coords[1]) + " " + str(coords[2]) + "\n") xyz_file.close() dp5Data.Compounds.append(qml.Compound(xyz = str(Settings.OutputFolder/"dp5"/ InputFile.stem) +"_"+ str(i).zfill(3) + ".xyz")) dp5Data.Compounds[-1].generate_fchl_representation(max_size=86, cut_distance=c_distance) dp5Data.AtomReps[-1].append([]) for C_l in iso.Clabels: ind = int(C_l.split("C")[1]) dp5Data.AtomReps[-1][-1].append(dp5Data.Compounds[-1].representation[ind]) #otherwise we need to fragment the molecule to radius of 3 else: for iso in Isomers: #open new xyz file InputFile = Path(iso.InputFile) #find conformer with the lowest energy dp5Data.AtomReps.append([]) for i,geom in enumerate(iso.DFTConformers): xyz_file = open(str(OutputFolder / "dp5" /InputFile.stem) + "_" +str(i).zfill(3) + ".xyz", "w") xyz_file.write(str(len(iso.Atoms)) + "\n" + "\n") for atom, coords in zip(iso.Atoms, geom): xyz_file.write(atom + " " + str(coords[0]) + " " + str(coords[1]) + " " + str(coords[2]) + "\n") xyz_file.close() #now need to fragment the molecule and generate these representations #build ob mol obconversion = OBConversion() obconversion.SetInFormat("sdf") m = OBMol() obconversion.ReadFile(m, iso.InputFile) os.mkdir(str(OutputFolder / "dp5" /InputFile.stem) + "_" +str(i).zfill(3) + "_fragments") mol_fragments(m,str(OutputFolder / "dp5" /InputFile.stem) + "_" +str(i).zfill(3) + "_fragments") conf_rep = [] for xyz_frag in sorted(os.listdir( str(OutputFolder / "dp5" /InputFile.stem) + "_" +str(i).zfill(3) + "_fragments")): c = qml.Compound(xyz=str(OutputFolder / "dp5" /InputFile.stem) + "_" +str(i).zfill(3) + "_fragments/" + xyz_frag) c.generate_fchl_representation(max_size=54, cut_distance=c_distance) conf_rep.append(c.representation[0]) dp5Data.AtomReps[-1].append([]) for C_l in iso.Clabels: ind = int(C_l.split("C")[1]) dp5Data.AtomReps[-1][-1].append(conf_rep[ind]) return dp5Data
def main(f, settings): """ Find the axis atoms Find all the atoms to be rotated Rotate it and the substituents to the other side of the plane """ obconversion = OBConversion() obconversion.SetInFormat("sdf") obmol = OBMol() obconversion.ReadFile(obmol, f) obmol.ConnectTheDots() #Find the atoms composing furan ring Rings = obmol.GetSSSR() furan = [] for ring in Rings: if len(settings.RingAtoms) == 5: if all(x in ring._path for x in settings.RingAtoms): furan = ring break else: if ring.Size() == 5 and not ring.IsAromatic(): furan = ring break if furan == []: "No five membered rings to rotate. Quitting..." quit() #Find the plane of the 5-membered ring and the outlying atom norm, d, outAtom = FindFuranPlane(obmol, furan) #Find the atoms connected to the outlying atom and sort them #as either part of the ring(axis atoms) or as atoms to be rotated AxisAtoms = [] RotAtoms = [] for NbrAtom in OBAtomAtomIter(outAtom): #if NbrAtom.IsInRingSize(5): if furan.IsInRing(NbrAtom.GetIdx()): AxisAtoms.append(NbrAtom) else: RotAtoms.append(NbrAtom) FindSubstAtoms(NbrAtom, outAtom, RotAtoms) #Simple switch to help detect if the atoms are rotated the right way WasAbove90 = False angle = FindRotAngle(AxisAtoms[0], AxisAtoms[1], outAtom, norm) if angle > 0.5 * pi: WasAbove90 = True rotangle = 2 * (angle - 0.5 * pi) else: WasAbove90 = False rotangle = 2 * (0.5 * pi - angle) OldAtomCoords = outAtom.GetVector() print("Atom " + str(outAtom.GetAtomicNum()) + " will be rotated by " +\ str(rotangle*57.3) + ' degrees') RotateAtom(outAtom, AxisAtoms[0], AxisAtoms[1], rotangle) angle2 = FindRotAngle(AxisAtoms[0], AxisAtoms[1], outAtom, norm) #if the atom is on the same side of the plane as it was, # it has been rotated in the wrong direction if ((angle2 > 0.5 * pi) and WasAbove90) or ((angle2 < 0.5 * pi) and not WasAbove90): #Flip the sign of the rotation angle, restore the coords #and rotate the atom in the opposite direction print("Atom was rotated the wrong way, switching the direction") rotangle = -rotangle outAtom.SetVector(OldAtomCoords) RotateAtom(outAtom, AxisAtoms[0], AxisAtoms[1], rotangle) RotatedAtoms = [] # Index to make sure that atoms are not rotated twice for atom in RotAtoms: if atom not in RotatedAtoms: RotateAtom(atom, AxisAtoms[0], AxisAtoms[1], rotangle) RotatedAtoms.append(atom) else: print("Atom already rotated, skipping") obconversion.SetOutFormat("sdf") obconversion.WriteFile(obmol, f[:-4] + 'rot.sdf')