def __init__(self, rec_typer, lig_typer, data_file, data_root, verbose=False): super().__init__() # what is this unknown column? # it's positive for low_rmsd, negative for ~low_rmsd, # but otherwise same absolute distributions... data_cols = [ 'low_rmsd', 'true_aff', 'xtal_rmsd', 'rec_src', 'lig_src', 'vina_aff' ] self.data = pd.read_csv(data_file, sep=' ', names=data_cols, index_col=False) self.root_dir = data_root ob_conv = ob.OBConversion() ob_conv.SetInFormat('pdb') self.read_pdb = ob_conv.ReadFile ob_conv = ob.OBConversion() ob_conv.SetInFormat('sdf') self.read_sdf = ob_conv.ReadFile self.mol_cache = dict() self.verbose = verbose self.rec_typer = rec_typer self.lig_typer = lig_typer
def test_perception_and_canonicalization(self): mol = parse_smiles("C1=CC=C(O)C=C1") conv = ob.OBConversion() # Input does perception. Output is not canonical conv.SetOutFormat("smi") s = conv.WriteString(mol) self.assertEqual(s, "c1ccc(O)cc1\t\n") conv = ob.OBConversion() # Perception and canonical generation conv.SetOutFormat("can") s = conv.WriteString(mol) self.assertEqual(s, "Oc1ccccc1\t\n")
def to_smiles(self, canonical=False, hydrogens=False): """Create the SMILES string from the system. Parameters ---------- canonical : bool = False Whether to create canonical SMILES hydrogens : bool = False Whether to keep H's in the SMILES string. Returns ------- str The SMILES string, or (SMILES, name) if the rname is requested """ logger.info("to_smiles") obConversion = openbabel.OBConversion() if canonical: obConversion.SetOutFormat("can") else: obConversion.SetOutFormat("smi") mol = self.to_OBMol() if hydrogens: obConversion.AddOption("h") smiles = obConversion.WriteString(mol) logger.info(f"smiles = '{smiles}'") return smiles.strip()
def __init__(self, verbose=False, debug=False): """ TERMS: d_i : bonds with different chemical groups e_i : unique list of non-H chemical elements involved in bonds s_i : chirality bit v_i : valence electrons (calculated as octet(8) - max number of bonds) b_i : sum of all bond orders """ self.converter = ob.OBConversion() self.converter.SetOutFormat('smi') self.verbose = verbose self.debug = debug # SMARTS patterns used to assign mesomeric properties to groups self._mesomery_patterns = { # SMARTS_pattern : [equivalent atoms idx list, contribution ] '[$([#8;X1])]=*-[$([#8;X1])]': [[[0, 2]], 1.5], # carboxylate, nitrate '[$([#7;X2](=*))](=*)(-*=*)': [[[2, 1]], 1.5], # azete ring # NOTE: # tautomeric forms of histidine, guanidine, and others are not considered due # to uncertainty in the implementation #'[NHX3][CH0X3](=[NH2X3+,NHX2+0])[NH2X3]': [ [[0,2],[0,3],[2,3] ], 1.3 ], # guanidine/guanidinium #'[NHX3][CH0X3](=[NH2X3+,NHX2+0])[NH2X3]': [ [[2,3] ], 1.5 ], # guanidine/guanidinium #'[$([NHX3](C)(C))][CH0X3](=[NH2X3+,NHX2+0])[NH2X3]': [ [[0,2],[0,3],[2,3] ], 1.3 ], # guanidine/guanidinium #'[CH2X4]' # histidine #'[#6X3]1:' # imidazole #'[$([#7X3H+,#7X2H0+0]:[#6X3H]:[#7X3H]),$([#7X3H])]:' #'[#6X3H]:' #'[$([#7X3H+,#7X2H0+0]:[#6X3H]:[#7X3H]),$([#7X3H])]:' #'[#6X3H]1' : [[[1,3]], 2.5], }
def export_pdbqt(file_name): ob_con = ob.OBConversion() ob_con.SetInAndOutFormats('pdb', 'pdbqt') mol = ob.OBMol() ob_con.ReadFile(mol, Path(f'{file_name}.pdb').as_posix()) ob_con.WriteFile(mol, Path(f'{file_name}.pdbqt').as_posix()) return Path(f'{file_name}.pdbqt')
def buildMoleculeFromSMILE(smileStr): """ Create molecular structure by using :param smileStr: The string of the SMILES :type smileStr: Python string :return: A molecule structure :rtype: ase.Atoms object """ from openbabel import openbabel from ase.io import read, write import numpy as np import os f = open('babel.xyz', 'w') gen3d = openbabel.OBOp.FindType('gen3D') mol = openbabel.OBMol() obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats('smi', 'xyz') obConversion.ReadString(mol, smileStr) gen3d.Do(mol, '--best') outMDL = obConversion.WriteString(mol) f.write(outMDL) f.close() atoms = read('babel.xyz') os.system('rm babel.xyz') return atoms
def inchi_to_key(inchi, engine="openbabel"): """Convert InChI representation to InChIKey hash. Parameters ---------- inchi : str InChI representation. engine : str (default: "openbabel") Molecular conversion engine ("openbabel" or "rdkit"). Returns ------- str InChIKey hash. """ if engine == "openbabel": obconversion = openbabel.OBConversion() obconversion.SetInAndOutFormats("inchi", "inchi") obmol = openbabel.OBMol() obconversion.ReadString(obmol, inchi) obconversion.SetOptions("K", obconversion.OUTOPTIONS) key = obconversion.WriteString(obmol).rstrip() elif engine == "rdkit": mol = Chem.MolFromInchi(inchi) key = Chem.MolToInchiKey(mol) else: raise AttributeError("Engine must be either 'openbabel' or 'rdkit'.") return key
def test_residue_class(hippos_config): """ Simple test on Residue class """ # Arrange custom_settings = { "omit_interaction": hippos_config.omit_interaction, "backbone": hippos_config.use_backbone, "res_weight": hippos_config.res_weight, "output_mode": hippos_config.output_mode, } mol_path = "tests/data/direct_ifp/mol2_vina/" protein_name = mol_path + "protein_vina.mol2" convert = ob.OBConversion() convert.SetInFormat("mol2") protein_mol = ob.OBMol() convert.ReadFile(protein_mol, protein_name) # Act residues = {} for name, num, in zip(hippos_config.residue_name, hippos_config.residue_number): residues[name] = Residue(protein_mol, name, num, custom_settings) # Assert assert residues["ARG116"].AA_name == "ARG"
def _obtain_n_benz(compound_smiles): """ Gets the number of benzene rings from the smiles. Only added for benzene aromatic rings. Parameters ----------- compound_smiles: str smiles of the compound Return ------ n_aromatic_rings: int Number of aromatic rings """ mol = openbabel.OBMol() obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") obConversion.ReadString(mol, compound_smiles) n_aromatic_rings = 0 for ring in mol.GetSSSR(): if ring.IsAromatic() and ring.Size() > 5: n_aromatic_rings += 1 # print(ring.Size(), ring.IsAromatic(), ring.GetType()) return n_aromatic_rings
def test_write_inchi(self): mol = parse_smiles("c1ccccc1O") conv = ob.OBConversion() conv.SetOutFormat("inchi") s = conv.WriteString(mol) # Note the newline! self.assertEqual(s, "InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-5,7H\n")
def inchi_to_can(inchi, engine="openbabel"): """Convert InChI to canonical SMILES. Parameters ---------- inchi : str InChI string. engine : str (default: "openbabel") Molecular conversion engine ("openbabel" or "rdkit"). Returns ------- str Canonical SMILES. """ if engine == "openbabel": obconversion = openbabel.OBConversion() obconversion.SetInAndOutFormats("inchi", "can") obmol = openbabel.OBMol() obconversion.ReadString(obmol, inchi) outinchi = obconversion.WriteString(obmol) can = outinchi.rstrip() elif engine == "rdkit": mol = Chem.MolFromInchi(inchi) can = Chem.MolToSmiles(mol) else: raise AttributeError( "Engine must be either 'openbabel' or 'rdkit'." ) return can
def qcmiles(jsmol, toolkit='rdkit'): import cmiles from openbabel import openbabel from openforcefield.topology.molecule import Molecule obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("xyz", "sdf") obmol = openbabel.OBMol() xyz_str = dict_to_xyz_string(jsmol) obConversion.ReadString(obmol, xyz_str) sdf = obConversion.WriteString(obmol) with io.StringIO(sdf) as sdf_stream: qcmol = Molecule.from_file(sdf_stream, file_format='SDF').to_qcschema().dict() # would be nice if oFF could handle data as strings to avoid IO #with open('mol.sdf','w') as f: # f.write(sdf) #qcmol = Molecule.from_file('mol.sdf').to_qcschema().dict() #os.remove("mol.sdf") # cmiles wants the schema with a flat xyz if len(qcmol['geometry'].shape) > 1: qcmol['geometry'] = np.reshape(qcmol['geometry'], (-1, )) attribs = cmiles.generator.get_molecule_ids(qcmol, toolkit=toolkit) return attribs
def test_write_sdf(self): conv = ob.OBConversion() conv.SetOutFormat("sdf") with TempDir() as tempdir: mol = parse_smiles("CCO") mol.SetTitle("#1") with SuppressLogging(): # XXX For some reason, this generates the warning # Warning in WriteMolecule No 2D or 3D coordinates exist. # Any stereochemical information will be lost. To generate # 2D or 3D coordinates use --gen2D or --gen3d. # Since not all users of the API will have a --gen2D/--gen3d option, # that's not always going to be useful. Plus, my test cases # have no stereochemical information. Oh, and hey - I don't even # call WriteMolecule directly conv.WriteFile(mol, tempdir("blah.sdf")) mol = parse_smiles("[NH4+]") mol.SetTitle("mol2") conv.Write(mol) conv.CloseOutFile() titles = [] atom_counts = [] for mol in readfile(tempdir("blah.sdf"), "sdf"): titles.append(mol.GetTitle()) atom_counts.append(mol.NumAtoms()) self.assertEqual(titles, ["#1", "mol2"]) self.assertEqual(atom_counts, [3, 5])
def can_to_inchi(can, engine="openbabel"): """Convert canonicalized SMILES to InChI. Parameters ---------- can : str Canonical SMILES. engine : str (default: "openbabel") Molecular conversion engine ("openbabel" or "rdkit"). Returns ------- str InChI string. """ if engine == "openbabel": obconversion = openbabel.OBConversion() obconversion.SetInAndOutFormats("can", "inchi") obmol = openbabel.OBMol() obconversion.ReadString(obmol, can) outcan = obconversion.WriteString(obmol) inchi = outcan.rstrip() elif engine == "rdkit": mol = Chem.MolFromSmiles(can) inchi = Chem.MolToInchi(mol) else: raise AttributeError("Engine must be either 'openbabel' or 'rdkit'.") return inchi
def runGaussianFromType(self, filename, fileformat): obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats(fileformat, "gjf") mol = openbabel.OBMol() obConversion.ReadFile(mol, filename) inputstr = obConversion.WriteString(mol) return self.runGaussianWithOpenBabel(inputstr)
def __init__(self, filename, ext): self.data = { 'name': '', 'index': '', 'metal': '', 'topology': '', 'parent': '', 'atomic_info': '', 'bond_table': '', 'connectivity': '', 'connect_flag': '', 'connect_sym': '' } name = os.path.split(filename)[-1] self.name = clean(name, ext) self.update(name=self.name) # may be a source of error.. untested obConversion = ob.OBConversion() obConversion.SetInAndOutFormats(ext, 'pdb') self.mol = ob.OBMol() if version_info.major >= 3: #self.mol = next(pybel.readfile(ext, filename)) obConversion.ReadFile(self.mol, filename) else: obConversion.ReadFile(self.mol, filename) #self.mol = pybel.readfile(ext, filename).next() self._reset_formal_charges()
def get_formatted_geometry(geometry_file: str, output_format: str, geometry_format: str = None) -> str: """ Returns the formatted molecular geometry from the given geometry file. The format of the geometry file is assumed based on the filename extension but can be specified with the ``geometry_format`` keyword argument. For a list of supported geometry formats, refer to the `Open Babel documentation <https://open-babel.readthedocs.io/en/latest/FileFormats/Overview.html>`_. :param geometry_file: the path to the input geometry file :param output_format: the desired output format :param geometry_format: the format of the input geometry file :return: formatted geometry """ if geometry_format is None: geometry_format = os.path.basename(geometry_file).split(".")[1] obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats(geometry_format, output_format) mol = openbabel.OBMol() obConversion.ReadFile(mol, geometry_file) formatted_output = obConversion.WriteString(mol) if formatted_output == "": message = "Unsupported input geometry format: {}".format(geometry_format) raise AttributeError(message) return formatted_output
def _inchi_labels(mol): """ Get the inchi canonical labels of the heavy atoms in the molecule Args: mol: The molecule. OpenBabel OBMol object Returns: The label mappings. List of tuple of canonical label, original label List of equivalent atoms. """ obconv = ob.OBConversion() obconv.SetOutFormat("inchi") obconv.AddOption("a", ob.OBConversion.OUTOPTIONS) obconv.AddOption("X", ob.OBConversion.OUTOPTIONS, "DoNotAddH") inchi_text = obconv.WriteString(mol) match = re.search( r"InChI=(?P<inchi>.+)\nAuxInfo=.+" r"/N:(?P<labels>[0-9,;]+)/(E:(?P<eq_atoms>[0-9," r";\(\)]*)/)?", inchi_text, ) inchi = match.group("inchi") label_text = match.group("labels") eq_atom_text = match.group("eq_atoms") heavy_atom_labels = tuple( int(i) for i in label_text.replace(";", ",").split(",")) eq_atoms = [] if eq_atom_text is not None: eq_tokens = re.findall(r"\(((?:[0-9]+,)+[0-9]+)\)", eq_atom_text.replace(";", ",")) eq_atoms = tuple( tuple(int(i) for i in t.split(",")) for t in eq_tokens) return heavy_atom_labels, eq_atoms, inchi
def Smiles2InChI(smiles): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInAndOutFormats("smiles", "inchi") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) return obConversion.WriteString(obmol).strip()
def InChI2Smiles(inchi): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInAndOutFormats("inchi", "smiles") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, inchi): raise OpenBabelError("Cannot read the InChI string: " + inchi) return obConversion.WriteString(obmol).split()[0]
def test_glob(): obmol = openbabel.OBMol() obConv = openbabel.OBConversion() obConv.SetInFormat("mol") obConv.ReadFile(obmol, os.path.join(THIS_DIR, "data/triphenylphosphine.mol")) pymol = pybel.Molecule(obmol) points = calc_props.get_atom_coords(pymol) assert_almost_equal(calc_props.calc_glob(points), 0.245503, 6, 1)
def parse_protein(protein): file_format = protein.split(".")[-1] convert = ob.OBConversion() convert.SetInFormat(file_format) protein_mol = ob.OBMol() convert.ReadFile(protein_mol, protein) return protein_mol
def read_structure(path): """Read an input structure file into an OpenBabel molecule""" mol = ob.OBMol() conv = ob.OBConversion() conv.SetInFormat(ob.OBConversion.FormatFromExt(path)) conv.ReadFile(mol, path) p = ob.OBChainsParser() p.PerceiveChains(mol) return mol
def __init__(self, verbose=False): self.__verbose = verbose self.__ccId = None self.__pybelMol = None self.__atomIdxD = None # self.__obConv = openbabel.OBConversion() self.__inputFormatDict = dict([f.split(" -- ") for f in self.__obConv.GetSupportedInputFormat()]) self.__outputFormatDict = dict([f.split(" -- ") for f in self.__obConv.GetSupportedOutputFormat()])
def runGaussianFromSMILES(self, SMILES): obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "gjf") mol = openbabel.OBMol() obConversion.ReadString(mol, SMILES) gen3d = openbabel.OBOp.FindType("Gen3D") gen3d.Do(mol, "--best") inputstr = obConversion.WriteString(mol) return self.runGaussianWithOpenBabel(inputstr)
def openbabel_opt(structure, atomnos, constrained_indexes, graphs=None, check=False, method='UFF', **kwargs): ''' return : MM-optimized structure (UFF/MMFF) ''' assert not check or graphs is not None, 'Either provide molecular graphs or do not check for scrambling.' filename='temp_ob_in.xyz' with open(filename, 'w') as f: write_xyz(structure, atomnos, f) outname = 'temp_ob_out.xyz' # Standard openbabel molecule load conv = ob.OBConversion() conv.SetInAndOutFormats('xyz','xyz') mol = ob.OBMol() more = conv.ReadFile(mol, filename) i = 0 # Define constraints constraints = ob.OBFFConstraints() for a, b in constrained_indexes: first_atom = mol.GetAtom(int(a+1)) length = first_atom.GetDistance(int(b+1)) constraints.AddDistanceConstraint(int(a+1), int(b+1), length) # Angstroms # constraints.AddAngleConstraint(1, 2, 3, 120.0) # Degrees # constraints.AddTorsionConstraint(1, 2, 3, 4, 180.0) # Degrees # Setup the force field with the constraints forcefield = ob.OBForceField.FindForceField(method) forcefield.Setup(mol, constraints) forcefield.SetConstraints(constraints) # Do a 500 steps conjugate gradient minimization # (or less if converges) and save the coordinates to mol. forcefield.ConjugateGradients(500) forcefield.GetCoordinates(mol) energy = forcefield.Energy() # Write the mol to a file conv.WriteFile(mol,outname) conv.CloseOutFile() opt_coords = read_xyz(outname).atomcoords[0] if check: success = scramble_check(opt_coords, atomnos, constrained_indexes, graphs) else: success = True return opt_coords, energy, success
def get_structure_string(self): try: # Open Babel >= 3.0.0 from openbabel import openbabel except ImportError: import openbabel oc = openbabel.OBConversion() oc.SetOutFormat('pdb') write = partial(oc.WriteFile, self._obj) return _get_structure_string(write)
def get_molecule_hash(self, mol): """ Return inchi as molecular hash """ obconv = ob.OBConversion() obconv.SetOutFormat("inchi") obconv.AddOption("X", ob.OBConversion.OUTOPTIONS, "DoNotAddH") inchi_text = obconv.WriteString(mol) match = re.search(r"InChI=(?P<inchi>.+)\n", inchi_text) return match.group("inchi")
def test_makeopenbabel(self): try: from openbabel import openbabel except: import openbabel atomnos = numpy.array([1, 8, 1], "i") atomcoords = numpy.array([[[-1., 1., 0.], [0., 0., 0.], [1., 1., 0.]]]) obmol = cclib2openbabel.makeopenbabel(atomcoords, atomnos) obconversion = openbabel.OBConversion() formatok = obconversion.SetOutFormat("inchi") assert obconversion.WriteString(obmol).strip() == "InChI=1S/H2O/h1H2"
def readfile(fname, format): """Read a file with OpenBabel and extract cclib attributes.""" _check_openbabel(_found_openbabel) obc = ob.OBConversion() if obc.SetInFormat(format): mol = ob.OBMol() obc.ReadFile(mol, fname) return makecclib(mol) else: print("Unable to load the %s reader from OpenBabel." % format) return {}