Beispiel #1
0
    def __init__(self,
                 rec_typer,
                 lig_typer,
                 data_file,
                 data_root,
                 verbose=False):
        super().__init__()

        # what is this unknown column?
        #  it's positive for low_rmsd, negative for ~low_rmsd,
        #  but otherwise same absolute distributions...
        data_cols = [
            'low_rmsd', 'true_aff', 'xtal_rmsd', 'rec_src', 'lig_src',
            'vina_aff'
        ]
        self.data = pd.read_csv(data_file,
                                sep=' ',
                                names=data_cols,
                                index_col=False)
        self.root_dir = data_root

        ob_conv = ob.OBConversion()
        ob_conv.SetInFormat('pdb')
        self.read_pdb = ob_conv.ReadFile

        ob_conv = ob.OBConversion()
        ob_conv.SetInFormat('sdf')
        self.read_sdf = ob_conv.ReadFile

        self.mol_cache = dict()
        self.verbose = verbose

        self.rec_typer = rec_typer
        self.lig_typer = lig_typer
Beispiel #2
0
    def test_perception_and_canonicalization(self):
        mol = parse_smiles("C1=CC=C(O)C=C1")
        conv = ob.OBConversion()
        # Input does perception. Output is not canonical
        conv.SetOutFormat("smi")
        s = conv.WriteString(mol)
        self.assertEqual(s, "c1ccc(O)cc1\t\n")

        conv = ob.OBConversion()
        # Perception and canonical generation
        conv.SetOutFormat("can")
        s = conv.WriteString(mol)
        self.assertEqual(s, "Oc1ccccc1\t\n")
Beispiel #3
0
    def to_smiles(self, canonical=False, hydrogens=False):
        """Create the SMILES string from the system.

        Parameters
        ----------
        canonical : bool = False
            Whether to create canonical SMILES
        hydrogens : bool = False
            Whether to keep H's in the SMILES string.

        Returns
        -------
        str
            The SMILES string, or (SMILES, name) if the rname is requested
        """
        logger.info("to_smiles")

        obConversion = openbabel.OBConversion()
        if canonical:
            obConversion.SetOutFormat("can")
        else:
            obConversion.SetOutFormat("smi")

        mol = self.to_OBMol()

        if hydrogens:
            obConversion.AddOption("h")
        smiles = obConversion.WriteString(mol)

        logger.info(f"smiles = '{smiles}'")

        return smiles.strip()
 def __init__(self, verbose=False, debug=False):
     """ 
         TERMS:
         
         d_i :   bonds with different chemical groups
         e_i :   unique list of non-H chemical elements involved in bonds
         s_i :   chirality bit
         v_i :   valence electrons (calculated as octet(8) - max number of bonds)
         b_i :   sum of all bond orders
     
     """
     self.converter = ob.OBConversion()
     self.converter.SetOutFormat('smi')
     self.verbose = verbose
     self.debug = debug
     # SMARTS patterns used to assign mesomeric properties to groups
     self._mesomery_patterns = {
         # SMARTS_pattern : [equivalent atoms idx list, contribution ]
         '[$([#8;X1])]=*-[$([#8;X1])]': [[[0, 2]],
                                         1.5],  # carboxylate, nitrate
         '[$([#7;X2](=*))](=*)(-*=*)': [[[2, 1]], 1.5],  # azete ring
         # NOTE:
         # tautomeric forms of histidine, guanidine, and others are not considered due
         # to uncertainty in the implementation
         #'[NHX3][CH0X3](=[NH2X3+,NHX2+0])[NH2X3]': [ [[0,2],[0,3],[2,3] ], 1.3 ],   # guanidine/guanidinium
         #'[NHX3][CH0X3](=[NH2X3+,NHX2+0])[NH2X3]': [ [[2,3] ], 1.5 ],   # guanidine/guanidinium
         #'[$([NHX3](C)(C))][CH0X3](=[NH2X3+,NHX2+0])[NH2X3]': [ [[0,2],[0,3],[2,3] ], 1.3 ],   # guanidine/guanidinium
         #'[CH2X4]' # histidine
         #'[#6X3]1:' # imidazole
         #'[$([#7X3H+,#7X2H0+0]:[#6X3H]:[#7X3H]),$([#7X3H])]:'
         #'[#6X3H]:'
         #'[$([#7X3H+,#7X2H0+0]:[#6X3H]:[#7X3H]),$([#7X3H])]:'
         #'[#6X3H]1' :  [[[1,3]], 2.5],
     }
Beispiel #5
0
def export_pdbqt(file_name):
    ob_con = ob.OBConversion()
    ob_con.SetInAndOutFormats('pdb', 'pdbqt')
    mol = ob.OBMol()
    ob_con.ReadFile(mol, Path(f'{file_name}.pdb').as_posix())
    ob_con.WriteFile(mol, Path(f'{file_name}.pdbqt').as_posix())
    return Path(f'{file_name}.pdbqt')
Beispiel #6
0
def buildMoleculeFromSMILE(smileStr):
    """
    Create molecular structure by using 

    :param smileStr: The string of the SMILES
    :type smileStr: Python string

    :return: A molecule structure
    :rtype: ase.Atoms object
    """

    from openbabel import openbabel
    from ase.io import read, write
    import numpy as np
    import os

    f = open('babel.xyz', 'w')
    gen3d = openbabel.OBOp.FindType('gen3D')
    mol = openbabel.OBMol()

    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats('smi', 'xyz')
    obConversion.ReadString(mol, smileStr)

    gen3d.Do(mol, '--best')
    outMDL = obConversion.WriteString(mol)
    f.write(outMDL)
    f.close()

    atoms = read('babel.xyz')
    os.system('rm babel.xyz')

    return atoms
Beispiel #7
0
def inchi_to_key(inchi, engine="openbabel"):
    """Convert InChI representation to InChIKey hash.

    Parameters
    ----------
    inchi : str
        InChI representation.
    engine : str (default: "openbabel")
        Molecular conversion engine ("openbabel" or "rdkit").

    Returns
    -------
    str
        InChIKey hash.

    """

    if engine == "openbabel":
        obconversion = openbabel.OBConversion()
        obconversion.SetInAndOutFormats("inchi", "inchi")
        obmol = openbabel.OBMol()
        obconversion.ReadString(obmol, inchi)
        obconversion.SetOptions("K", obconversion.OUTOPTIONS)
        key = obconversion.WriteString(obmol).rstrip()
    elif engine == "rdkit":
        mol = Chem.MolFromInchi(inchi)
        key = Chem.MolToInchiKey(mol)
    else:
        raise AttributeError("Engine must be either 'openbabel' or 'rdkit'.")
    return key
Beispiel #8
0
def test_residue_class(hippos_config):
    """
    Simple test on Residue class
    """

    # Arrange

    custom_settings = {
        "omit_interaction": hippos_config.omit_interaction,
        "backbone": hippos_config.use_backbone,
        "res_weight": hippos_config.res_weight,
        "output_mode": hippos_config.output_mode,
    }

    mol_path = "tests/data/direct_ifp/mol2_vina/"
    protein_name = mol_path + "protein_vina.mol2"
    convert = ob.OBConversion()
    convert.SetInFormat("mol2")
    protein_mol = ob.OBMol()
    convert.ReadFile(protein_mol, protein_name)

    # Act

    residues = {}
    for name, num, in zip(hippos_config.residue_name,
                          hippos_config.residue_number):
        residues[name] = Residue(protein_mol, name, num, custom_settings)

    # Assert

    assert residues["ARG116"].AA_name == "ARG"
    def _obtain_n_benz(compound_smiles):
        """
        Gets the number of benzene rings from the smiles. Only added for benzene aromatic rings.

        Parameters
        -----------
        compound_smiles: str
            smiles of the compound

        Return
        ------
        n_aromatic_rings: int
            Number of aromatic rings
        """

        mol = openbabel.OBMol()
        obConversion = openbabel.OBConversion()
        obConversion.SetInAndOutFormats("smi", "mdl")
        obConversion.ReadString(mol, compound_smiles)
        n_aromatic_rings = 0
        for ring in mol.GetSSSR():
            if ring.IsAromatic() and ring.Size() > 5:
                n_aromatic_rings += 1
            # print(ring.Size(), ring.IsAromatic(), ring.GetType())
        return n_aromatic_rings
Beispiel #10
0
 def test_write_inchi(self):
     mol = parse_smiles("c1ccccc1O")
     conv = ob.OBConversion()
     conv.SetOutFormat("inchi")
     s = conv.WriteString(mol)
     # Note the newline!
     self.assertEqual(s, "InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-5,7H\n")
Beispiel #11
0
def inchi_to_can(inchi, engine="openbabel"):
    """Convert InChI to canonical SMILES.

    Parameters
    ----------
    inchi : str
        InChI string.
    engine : str (default: "openbabel")
        Molecular conversion engine ("openbabel" or "rdkit").

    Returns
    -------
    str
        Canonical SMILES.
    """
    if engine == "openbabel":
        obconversion = openbabel.OBConversion()
        obconversion.SetInAndOutFormats("inchi", "can")
        obmol = openbabel.OBMol()
        obconversion.ReadString(obmol, inchi)
        outinchi = obconversion.WriteString(obmol)
        can = outinchi.rstrip()
    elif engine == "rdkit":
        mol = Chem.MolFromInchi(inchi)
        can = Chem.MolToSmiles(mol)
    else:
        raise AttributeError(
            "Engine must be either 'openbabel' or 'rdkit'."
            )
    return can
def qcmiles(jsmol, toolkit='rdkit'):

    import cmiles
    from openbabel import openbabel
    from openforcefield.topology.molecule import Molecule

    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats("xyz", "sdf")
    obmol = openbabel.OBMol()

    xyz_str = dict_to_xyz_string(jsmol)

    obConversion.ReadString(obmol, xyz_str)
    sdf = obConversion.WriteString(obmol)

    with io.StringIO(sdf) as sdf_stream:
        qcmol = Molecule.from_file(sdf_stream,
                                   file_format='SDF').to_qcschema().dict()

    # would be nice if oFF could handle data as strings to avoid IO
    #with open('mol.sdf','w') as f:
    #    f.write(sdf)

    #qcmol = Molecule.from_file('mol.sdf').to_qcschema().dict()
    #os.remove("mol.sdf")

    # cmiles wants the schema with a flat xyz
    if len(qcmol['geometry'].shape) > 1:
        qcmol['geometry'] = np.reshape(qcmol['geometry'], (-1, ))

    attribs = cmiles.generator.get_molecule_ids(qcmol, toolkit=toolkit)
    return attribs
Beispiel #13
0
    def test_write_sdf(self):
        conv = ob.OBConversion()
        conv.SetOutFormat("sdf")
        with TempDir() as tempdir:
            mol = parse_smiles("CCO")
            mol.SetTitle("#1")
            with SuppressLogging():
                # XXX For some reason, this generates the warning
                #   Warning in WriteMolecule No 2D or 3D coordinates exist.
                #   Any stereochemical information will be lost. To generate
                #   2D or 3D coordinates use --gen2D or --gen3d.
                # Since not all users of the API will have a --gen2D/--gen3d option,
                # that's not always going to be useful. Plus, my test cases
                # have no stereochemical information. Oh, and hey - I don't even
                # call WriteMolecule directly
                conv.WriteFile(mol, tempdir("blah.sdf"))
            mol = parse_smiles("[NH4+]")
            mol.SetTitle("mol2")
            conv.Write(mol)
            conv.CloseOutFile()

            titles = []
            atom_counts = []
            for mol in readfile(tempdir("blah.sdf"), "sdf"):
                titles.append(mol.GetTitle())
                atom_counts.append(mol.NumAtoms())
            self.assertEqual(titles, ["#1", "mol2"])
            self.assertEqual(atom_counts, [3, 5])
Beispiel #14
0
def can_to_inchi(can, engine="openbabel"):
    """Convert canonicalized SMILES to InChI.

    Parameters
    ----------
    can : str
        Canonical SMILES.
    engine : str (default: "openbabel")
        Molecular conversion engine ("openbabel" or "rdkit").

    Returns
    -------
    str
        InChI string.
    """
    if engine == "openbabel":
        obconversion = openbabel.OBConversion()
        obconversion.SetInAndOutFormats("can", "inchi")
        obmol = openbabel.OBMol()
        obconversion.ReadString(obmol, can)
        outcan = obconversion.WriteString(obmol)
        inchi = outcan.rstrip()
    elif engine == "rdkit":
        mol = Chem.MolFromSmiles(can)
        inchi = Chem.MolToInchi(mol)
    else:
        raise AttributeError("Engine must be either 'openbabel' or 'rdkit'.")
    return inchi
Beispiel #15
0
 def runGaussianFromType(self, filename, fileformat):
     obConversion = openbabel.OBConversion()
     obConversion.SetInAndOutFormats(fileformat, "gjf")
     mol = openbabel.OBMol()
     obConversion.ReadFile(mol, filename)
     inputstr = obConversion.WriteString(mol)
     return self.runGaussianWithOpenBabel(inputstr)
Beispiel #16
0
 def __init__(self, filename, ext):
     self.data = {
         'name': '',
         'index': '',
         'metal': '',
         'topology': '',
         'parent': '',
         'atomic_info': '',
         'bond_table': '',
         'connectivity': '',
         'connect_flag': '',
         'connect_sym': ''
     }
     name = os.path.split(filename)[-1]
     self.name = clean(name, ext)
     self.update(name=self.name)
     # may be a source of error.. untested
     obConversion = ob.OBConversion()
     obConversion.SetInAndOutFormats(ext, 'pdb')
     self.mol = ob.OBMol()
     if version_info.major >= 3:
         #self.mol = next(pybel.readfile(ext, filename))
         obConversion.ReadFile(self.mol, filename)
     else:
         obConversion.ReadFile(self.mol, filename)
         #self.mol = pybel.readfile(ext, filename).next()
     self._reset_formal_charges()
Beispiel #17
0
def get_formatted_geometry(geometry_file: str, output_format: str, geometry_format: str = None) -> str:
    """
    Returns the formatted molecular geometry from the given geometry file. The
    format of the geometry file is assumed based on the filename extension but
    can be specified with the ``geometry_format`` keyword argument.

    For a list of supported geometry formats, refer to the `Open Babel documentation
    <https://open-babel.readthedocs.io/en/latest/FileFormats/Overview.html>`_.

    :param geometry_file: the path to the input geometry file
    :param output_format: the desired output format
    :param geometry_format: the format of the input geometry file
    :return: formatted geometry
    """
    if geometry_format is None:
        geometry_format = os.path.basename(geometry_file).split(".")[1]

    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats(geometry_format, output_format)

    mol = openbabel.OBMol()
    obConversion.ReadFile(mol, geometry_file)

    formatted_output = obConversion.WriteString(mol)

    if formatted_output == "":
        message = "Unsupported input geometry format: {}".format(geometry_format)
        raise AttributeError(message)

    return formatted_output
Beispiel #18
0
    def _inchi_labels(mol):
        """
        Get the inchi canonical labels of the heavy atoms in the molecule

        Args:
            mol: The molecule. OpenBabel OBMol object

        Returns:
            The label mappings. List of tuple of canonical label,
            original label
            List of equivalent atoms.
        """
        obconv = ob.OBConversion()
        obconv.SetOutFormat("inchi")
        obconv.AddOption("a", ob.OBConversion.OUTOPTIONS)
        obconv.AddOption("X", ob.OBConversion.OUTOPTIONS, "DoNotAddH")
        inchi_text = obconv.WriteString(mol)
        match = re.search(
            r"InChI=(?P<inchi>.+)\nAuxInfo=.+"
            r"/N:(?P<labels>[0-9,;]+)/(E:(?P<eq_atoms>[0-9,"
            r";\(\)]*)/)?",
            inchi_text,
        )
        inchi = match.group("inchi")
        label_text = match.group("labels")
        eq_atom_text = match.group("eq_atoms")
        heavy_atom_labels = tuple(
            int(i) for i in label_text.replace(";", ",").split(","))
        eq_atoms = []
        if eq_atom_text is not None:
            eq_tokens = re.findall(r"\(((?:[0-9]+,)+[0-9]+)\)",
                                   eq_atom_text.replace(";", ","))
            eq_atoms = tuple(
                tuple(int(i) for i in t.split(",")) for t in eq_tokens)
        return heavy_atom_labels, eq_atoms, inchi
Beispiel #19
0
 def Smiles2InChI(smiles):
     obConversion = openbabel.OBConversion()
     obConversion.AddOption("w", obConversion.OUTOPTIONS)
     obConversion.SetInAndOutFormats("smiles", "inchi")
     obmol = openbabel.OBMol()
     if not obConversion.ReadString(obmol, smiles):
         raise OpenBabelError("Cannot read the SMILES string: " + smiles)
     return obConversion.WriteString(obmol).strip()
Beispiel #20
0
 def InChI2Smiles(inchi):
     obConversion = openbabel.OBConversion()
     obConversion.AddOption("w", obConversion.OUTOPTIONS)
     obConversion.SetInAndOutFormats("inchi", "smiles")
     obmol = openbabel.OBMol()
     if not obConversion.ReadString(obmol, inchi):
         raise OpenBabelError("Cannot read the InChI string: " + inchi)
     return obConversion.WriteString(obmol).split()[0]
Beispiel #21
0
def test_glob():
    obmol = openbabel.OBMol()
    obConv = openbabel.OBConversion()
    obConv.SetInFormat("mol")
    obConv.ReadFile(obmol, os.path.join(THIS_DIR, "data/triphenylphosphine.mol"))
    pymol = pybel.Molecule(obmol)
    points = calc_props.get_atom_coords(pymol)
    assert_almost_equal(calc_props.calc_glob(points), 0.245503, 6, 1)
Beispiel #22
0
def parse_protein(protein):
    file_format = protein.split(".")[-1]
    convert = ob.OBConversion()
    convert.SetInFormat(file_format)
    protein_mol = ob.OBMol()
    convert.ReadFile(protein_mol, protein)

    return protein_mol
def read_structure(path):
    """Read an input structure file into an OpenBabel molecule"""
    mol = ob.OBMol()
    conv = ob.OBConversion()
    conv.SetInFormat(ob.OBConversion.FormatFromExt(path))
    conv.ReadFile(mol, path)
    p = ob.OBChainsParser()
    p.PerceiveChains(mol)
    return mol
Beispiel #24
0
 def __init__(self, verbose=False):
     self.__verbose = verbose
     self.__ccId = None
     self.__pybelMol = None
     self.__atomIdxD = None
     #
     self.__obConv = openbabel.OBConversion()
     self.__inputFormatDict = dict([f.split(" -- ") for f in self.__obConv.GetSupportedInputFormat()])
     self.__outputFormatDict = dict([f.split(" -- ") for f in self.__obConv.GetSupportedOutputFormat()])
Beispiel #25
0
 def runGaussianFromSMILES(self, SMILES):
     obConversion = openbabel.OBConversion()
     obConversion.SetInAndOutFormats("smi", "gjf")
     mol = openbabel.OBMol()
     obConversion.ReadString(mol, SMILES)
     gen3d = openbabel.OBOp.FindType("Gen3D")
     gen3d.Do(mol, "--best")
     inputstr = obConversion.WriteString(mol)
     return self.runGaussianWithOpenBabel(inputstr)
Beispiel #26
0
    def openbabel_opt(structure, atomnos, constrained_indexes, graphs=None, check=False, method='UFF', **kwargs):
        '''
        return : MM-optimized structure (UFF/MMFF)
        '''

        assert not check or graphs is not None, 'Either provide molecular graphs or do not check for scrambling.'

        filename='temp_ob_in.xyz'

        with open(filename, 'w') as f:
            write_xyz(structure, atomnos, f)

        outname = 'temp_ob_out.xyz'

        # Standard openbabel molecule load
        conv = ob.OBConversion()
        conv.SetInAndOutFormats('xyz','xyz')
        mol = ob.OBMol()
        more = conv.ReadFile(mol, filename)
        i = 0

        # Define constraints
        constraints = ob.OBFFConstraints()

        for a, b in constrained_indexes:

            first_atom = mol.GetAtom(int(a+1))
            length = first_atom.GetDistance(int(b+1))

            constraints.AddDistanceConstraint(int(a+1), int(b+1), length)       # Angstroms
            # constraints.AddAngleConstraint(1, 2, 3, 120.0)      # Degrees
            # constraints.AddTorsionConstraint(1, 2, 3, 4, 180.0) # Degrees

        # Setup the force field with the constraints
        forcefield = ob.OBForceField.FindForceField(method)
        forcefield.Setup(mol, constraints)
        forcefield.SetConstraints(constraints)

        # Do a 500 steps conjugate gradient minimization
        # (or less if converges) and save the coordinates to mol.
        forcefield.ConjugateGradients(500)
        forcefield.GetCoordinates(mol)
        energy = forcefield.Energy()

        # Write the mol to a file
        conv.WriteFile(mol,outname)
        conv.CloseOutFile()

        opt_coords = read_xyz(outname).atomcoords[0]

        if check:
            success = scramble_check(opt_coords, atomnos, constrained_indexes, graphs)
        else:
            success = True

        return opt_coords, energy, success
Beispiel #27
0
 def get_structure_string(self):
     try:
         # Open Babel >= 3.0.0
         from openbabel import openbabel
     except ImportError:
         import openbabel
     oc = openbabel.OBConversion()
     oc.SetOutFormat('pdb')
     write = partial(oc.WriteFile, self._obj)
     return _get_structure_string(write)
Beispiel #28
0
 def get_molecule_hash(self, mol):
     """
     Return inchi as molecular hash
     """
     obconv = ob.OBConversion()
     obconv.SetOutFormat("inchi")
     obconv.AddOption("X", ob.OBConversion.OUTOPTIONS, "DoNotAddH")
     inchi_text = obconv.WriteString(mol)
     match = re.search(r"InChI=(?P<inchi>.+)\n", inchi_text)
     return match.group("inchi")
Beispiel #29
0
 def test_makeopenbabel(self):
     try:
         from openbabel import openbabel
     except:
         import openbabel
     atomnos = numpy.array([1, 8, 1], "i")
     atomcoords = numpy.array([[[-1., 1., 0.], [0., 0., 0.], [1., 1., 0.]]])
     obmol = cclib2openbabel.makeopenbabel(atomcoords, atomnos)
     obconversion = openbabel.OBConversion()
     formatok = obconversion.SetOutFormat("inchi")
     assert obconversion.WriteString(obmol).strip() == "InChI=1S/H2O/h1H2"
Beispiel #30
0
def readfile(fname, format):
    """Read a file with OpenBabel and extract cclib attributes."""
    _check_openbabel(_found_openbabel)
    obc = ob.OBConversion()
    if obc.SetInFormat(format):
        mol = ob.OBMol()
        obc.ReadFile(mol, fname)
        return makecclib(mol)
    else:
        print("Unable to load the %s reader from OpenBabel." % format)
        return {}