Ejemplo n.º 1
0
    def testReadingMassDifferenceInMolfiles(self):
        """Previously we were rounding incorrectly when reading the mass diff"""
        template = """
 OpenBabel02181811152D

  1  0  0  0  0  0  0  0  0  0999 V2000
    0.0000    0.0000    0.0000 %2s %2d  0  0  0  0  0  0  0  0  0  0  0
M  END
"""
        # Positive test cases:
        # These are the BIOVIA Draw answers for the first 50 elements for
        # a mass diff of 1
        answers = [2,5,8,10,12,13,15,17,20,21,24,25,28,29,32,33,36,41,40,41,46,49,52,53,56,57,60,60,65,66,71,74,76,80,81,85,86,89,90,92,94,97,99,102,104,107,109,113,116,120,123]
        for idx, answer in enumerate(answers):
            elem = idx + 1
            molfile = template % (ob.GetSymbol(elem), 1)
            mol = pybel.readstring("mol", molfile).OBMol
            iso = mol.GetAtom(1).GetIsotope()
            self.assertEqual(answer, iso)

        # Also test D and T - BIOVIA Draw ignores the mass diff
        for elem, answer in zip("DT", [2, 3]):
            molfile = template % (elem, 1)
            mol = pybel.readstring("mol", molfile).OBMol
            iso = mol.GetAtom(1).GetIsotope()
            self.assertEqual(answer, iso)

        # Negative test cases:
        # Test error message for out-of-range values
        for value in [5, -4]:
            molfile = template % ("C", value)
            mol = pybel.readstring("mol", molfile).OBMol
            iso = mol.GetAtom(1).GetIsotope()
            self.assertEqual(0, iso)
Ejemplo n.º 2
0
 def testSquarePlanar(self):
     """Tighten up the parsing of SP stereochemistry in SMILES"""
     good = [
             "C[S@SP1](Cl)(Br)I",
             "C[S@SP2](Cl)(Br)I",
             "C[S@SP3](Cl)(Br)I",
             ]
     bad = [ # raises error
             "C[S@SP0](Cl)(Br)I",
             "C[S@SP4](Cl)(Br)I",
             "C[S@@SP1](Cl)(Br)I",
             "C[S@SP11](Cl)(Br)I",
             "C[S@SO1](Cl)(Br)I",
           ]
     alsobad = [ # just a warning
             "C[S@SP1](Cl)(Br)(F)I",
             "C[S@SP1](Cl)(Br)(F)1CCCC1",
             ]
     for smi in good:
         mol = pybel.readstring("smi", smi)
         self.assertTrue(mol.OBMol.GetData(ob.StereoData))
     for smi in bad:
         self.assertRaises(IOError, pybel.readstring, "smi", smi)
     for smi in alsobad:
         mol = pybel.readstring("smi", smi)
         self.assertTrue(mol.OBMol.GetData(ob.StereoData))
Ejemplo n.º 3
0
    def testOBMolSeparatePreservesAtomOrder(self):
        """Originally Separate() preserved DFS order rather
        than atom order"""
        # First test
        smi = "C123.F3.Cl2.Br1"
        mol = pybel.readstring("smi", smi)
        atomicnums = [atom.OBAtom.GetAtomicNum() for atom in mol]
        mols = mol.OBMol.Separate()
        new_atomicnums = [atom.OBAtom.GetAtomicNum() for atom in pybel.Molecule(mols[0])]
        for x, y in zip(atomicnums, new_atomicnums):
            self.assertEqual(x, y) # check that the atoms have not been permuted
        # Second test
        xyz = """6
examples/water_dimer.xyz
O          0.12908       -0.26336        0.64798
H          0.89795        0.28805        0.85518
H          0.10833       -0.20468       -0.33302
O          0.31020        0.07569       -2.07524
H          0.64083       -0.57862       -2.71449
H         -0.26065        0.64232       -2.62218
"""
        mol = pybel.readstring("xyz", xyz)
        mols = mol.OBMol.Separate()
        allatoms = pybel.Molecule(mols[0]).atoms + pybel.Molecule(mols[1]).atoms
        for idx, atom in enumerate(allatoms):
            xcoord = atom.OBAtom.GetX()
            orig_xcoord = mol.OBMol.GetAtom(idx+1).GetX()
            self.assertEqual(xcoord, orig_xcoord)
Ejemplo n.º 4
0
 def testMOL(self):
     """Roundtrip thru MOL file"""
     smi = "C[CH3:6]"
     mol = pybel.readstring("smi", smi)
     molfile = mol.write("mol", opt={"a":True})
     molb = pybel.readstring("mol", molfile)
     out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True})
     self.assertEqual(smi, out)
Ejemplo n.º 5
0
 def testSettingSpinMult(self):
     """Set spin and read/write it"""
     mol = pybel.readstring("smi", "C")
     mol.atoms[0].OBAtom.SetSpinMultiplicity(2)
     molfile = mol.write("mol")
     self.assertEqual("M  RAD  1   1   2", molfile.split("\n")[5])
     molb = pybel.readstring("mol", molfile)
     self.assertEqual(2, molb.atoms[0].OBAtom.GetSpinMultiplicity())
     self.assertEqual(4, molb.atoms[0].OBAtom.GetImplicitHCount())
Ejemplo n.º 6
0
 def testRGroup(self):
     """[*:1] is converted to R1 in MOL file handling"""
     smi = "[*:6]C"
     mol = pybel.readstring("smi", smi)
     molfile = mol.write("mol")
     self.assertTrue("M  RGP  1   1   6" in molfile)
     molb = pybel.readstring("mol", molfile)
     out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True})
     self.assertEqual(smi, out)
Ejemplo n.º 7
0
 def testSmilesParsingAndWritingOfLargeIsotopes(self):
     smis = ["[1C]", "[11C]", "[111C]", "[1111C]"]
     for smi in smis:
         mol = pybel.readstring("smi", smi)
         self.assertEqual(mol.write("smi").rstrip(), smi)
     self.assertRaises(IOError, pybel.readstring, "smi", "[11111C]")
     mol = pybel.readstring("smi", "[C]")
     mol.atoms[0].OBAtom.SetIsotope(65535)
     self.assertEqual(mol.write("smi").rstrip(), "[C]")
Ejemplo n.º 8
0
 def testInChIIsotopes(self):
     """Ensure that we correctly set and read isotopes in InChIs"""
     with open(os.path.join(here, "inchi", "inchi_isotopes.txt")) as inp:
         for line in inp:
             if line.startswith("#"): continue
             smi, inchi = line.rstrip().split("\t")
             minchi = pybel.readstring("smi", smi).write("inchi").rstrip()
             self.assertEqual(minchi, inchi)
             msmi = pybel.readstring("inchi", minchi).write("smi").rstrip()
             self.assertEqual(msmi, smi)
Ejemplo n.º 9
0
 def testAtomMapsAfterDeletion(self):
     """Removing atoms/hydrogens should not mess up the atom maps"""
     smis = ["C[NH2:2]", "[CH3:1][NH2:2]"]
     for smi in smis:
         mol = pybel.readstring("smi", smi)
         mol.OBMol.DeleteAtom(mol.OBMol.GetAtom(1))
         self.assertEqual(mol.write("smi", opt={"a":True}).rstrip(), "[NH2:2]")
     smi = "[H]C[NH:2]"
     mol = pybel.readstring("smi", smi)
     mol.removeh()
     self.assertEqual(mol.write("smi", opt={"a":True}).rstrip(), "C[NH:2]")
Ejemplo n.º 10
0
 def testCML(self):
     """OB stores atom classes using _NN at the end of atom ids"""
     smis = ["[CH3:6]C", "[CH3:6][OH:6]",
             "O"+"[CH2:2]"*27+"O"
             ]
     for smi in smis:
         mol = pybel.readstring("smi", smi)
         cml = mol.write("cml")
         molb = pybel.readstring("mol", cml)
         out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True})
         self.assertEqual(smi, out)
Ejemplo n.º 11
0
 def testSmilesAtomOrder(self):
     """Ensure that SMILES atom order is written correctly"""
     data = [("CC", "1 2"),
             ("O=CCl", "3 2 1")]
     for smi, atomorder in data:
         mol = pybel.readstring("smi", smi)
         mol.write("can", opt={"O": True})
         res = mol.data["SMILES Atom Order"]
         self.assertEqual(res, atomorder)
     mol = pybel.readstring("smi", "CC")
     mol.write("can")
     self.assertFalse("SMILES Atom Order" in mol.data)
Ejemplo n.º 12
0
    def testFuzzingTestCases(self):
        """Ensure that fuzzing testcases do not cause crashes"""

        # rejected as invalid smiles
        smis = [r"\0", "&0", "=&",
                "[H][S][S][S@S00]0[S][S@S00H](0[S@S00][S])0n"]
        for smi in smis:
            self.assertRaises(IOError, pybel.readstring, "smi", smi)

        smis = ["c0C[C@H](B)00O0"] # warning and stereo ignored
        for smi in smis:
            pybel.readstring("smi", smi)
Ejemplo n.º 13
0
 def testSmilesToMol(self):
     smis = ["C", "[CH3]", "[CH2]", "[CH2]C", "[C]"]
     valences = [0, 3, 2, 3, 15]
     for smi, valence in zip(smis, valences):
         mol = pybel.readstring("smi", smi)
         molfile = mol.write("mol")
         firstcarbon = molfile.split("\n")[4]
         mvalence = int(firstcarbon[48:53])
         self.assertEqual(valence, mvalence)
         # test molfile->smiles
         msmi = pybel.readstring("mol", molfile).write("smi").rstrip()
         self.assertEqual(smi, msmi)
Ejemplo n.º 14
0
 def testImplicitCisDblBond(self):
     """Ensure that dbl bonds in rings of size 8 or less are always
     implicitly cis"""
     smi = "C1/C=C/C"
     for i in range(5): # from size 4 to 8
         ringsize = i + 4
         ringsmi = smi + "1"
         roundtrip = pybel.readstring("smi", ringsmi).write("smi")
         self.assertTrue("/" not in roundtrip)
         smi += "C"
     ringsize = 9
     ringsmi = smi + "1"
     roundtrip = pybel.readstring("smi", ringsmi).write("smi")
     self.assertTrue("/" in roundtrip)
Ejemplo n.º 15
0
 def testSmiToSmi(self):
     # Should preserve stereo
     tet = "[C@@H](Br)(Br)Br"
     out = pybel.readstring("smi", tet).write("smi")
     self.assertTrue("@" in out)
     cistrans = r"C/C=C(\C)/C"
     out = pybel.readstring("smi", cistrans).write("smi")
     self.assertTrue("/" in out)
     # Should wipe stereo
     out = pybel.readstring("smi", tet, opt={"S": True}).write("smi")
     self.assertFalse("@" in out)
     cistrans = r"C/C=C(\C)/C"
     out = pybel.readstring("smi", cistrans, opt={"S": True}).write("smi")
     self.assertFalse("/" in out)
Ejemplo n.º 16
0
def generate_atomic_coordinates(smiles) -> str:
    """Attempt to further refine the molecular structure through a rotor search
    Code adapted from: http://forums.openbabel.org/OpenBabel-Conformer-Search-td4177357.html
    Args:
        smiles (string): Smiles string of molecule to be generated
    Returns:
        (string): XYZ coordinates of molecule
    """

    # Convert it to a OpenBabel molecule
    mol = readstring('smi', smiles)

    # Generate initial 3D coordinates
    mol.make3D()

    # Try to get a forcefield that works with this molecule
    ff = _get_forcefield(mol)

    # initial cleanup before the weighted search
    ff.SteepestDescent(500, 1.0e-4)
    ff.WeightedRotorSearch(100, 20)
    ff.ConjugateGradients(500, 1.0e-6)
    ff.GetCoordinates(mol.OBMol)

    return mol.write("xyz")
Ejemplo n.º 17
0
    def GetMolFromNCBI(self, ID: str = "") -> None:
        """Get a molecule by NCBI id.

        :param ID: CID NCBI compound identifier (e.g., 2244).
        """
        self.rdmol = getmol.GetMolFromNCBI(cid=ID)
        self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
Ejemplo n.º 18
0
 def testAtomMapsAfterCopying(self):
     """Copying a molecule should copy the atom maps"""
     smi = "C[CH2:2]O[Cl:6]"
     obmol = pybel.readstring("smi", smi).OBMol
     copy = pybel.ob.OBMol(obmol)
     copysmi = pybel.Molecule(copy).write("smi", opt={"a": True})
     self.assertEqual(copysmi.rstrip(), smi)
Ejemplo n.º 19
0
    def testOBMolSeparatePreservesAromaticity(self):
        """If the original molecule had aromaticity perceived,
        then the fragments should also.
        """
        smi = "C.c1ccccc1"
        # Two passes: One with aromaticity perceived on the orig mol and
        #             one without
        for N in range(2):
            obmol = pybel.readstring("smi", smi).OBMol
            # Aromaticity is perceived during the last step of reading SMILES
            # so let's unset it here for the first pass
            if N == 0:
                obmol.SetAromaticPerceived(False)
            else:
                self.assertTrue(obmol.HasAromaticPerceived())

            # After separation, is aromaticity the same as the parent?
            mols = obmol.Separate()
            if N == 0:
                self.assertFalse(mols[1].HasAromaticPerceived())
            else:
                self.assertTrue(mols[1].HasAromaticPerceived())

            atom = mols[1].GetAtom(1)
            atom.SetImplicitHCount(0) # mess up the structure
            if N == 0:
                self.assertFalse(atom.IsAromatic())
            else:
                self.assertTrue(atom.IsAromatic())
Ejemplo n.º 20
0
    def testOBMolSeparatePreservesAromaticity(self):
        """If the original molecule had aromaticity perceived,
        then the fragments should also.
        """
        smi = "C.c1ccccc1"
        # Two passes: One with aromaticity perceived on the orig mol and
        #             one without
        for N in range(2):
            obmol = pybel.readstring("smi", smi).OBMol
            # Aromaticity is perceived during the last step of reading SMILES
            # so let's unset it here for the first pass
            if N == 0:
                obmol.SetAromaticPerceived(False)
            else:
                self.assertTrue(obmol.HasAromaticPerceived())

            # After separation, is aromaticity the same as the parent?
            mols = obmol.Separate()
            if N == 0:
                self.assertFalse(mols[1].HasAromaticPerceived())
            else:
                self.assertTrue(mols[1].HasAromaticPerceived())

            atom = mols[1].GetAtom(1)
            atom.SetImplicitHCount(0) # mess up the structure
            if N == 0:
                self.assertFalse(atom.IsAromatic())
            else:
                self.assertTrue(atom.IsAromatic())
def parse_mol_simple(molformat, molstr):
    if molformat == "smiles":
        format = "smi"
    else:
        format = molformat

    return pybel.readstring(format, molstr)
Ejemplo n.º 22
0
def convert_ase2rdkit(atoms, removeHs=False):
    """
    Convert an ASE atoms object to rdkit molecule.
    The ordering of the Atoms is identical.


    Important: Implemented only for clusters, not PBC!
    rdkit does not keep xyz coordinates, therefore
    a backconversion is not possible yet.

    Parameters
    ----------
    atoms : ase.Atoms
        The ASE atoms object
    removeHs : Bool
        If True, remove all H atoms from molecule.

    Returns
    -------
    mol : rdkit.Chem.rdchem.Mol
        The rdkit molecule object.
    """
    a_str = __ase2xyz__(atoms)
    pymol = pb.readstring("xyz", a_str)
    mol = pymol.write("mol")
    mol = Chem.MolFromMolBlock(mol, removeHs=removeHs)
    return mol
Ejemplo n.º 23
0
 def testDeleteHydrogens(self):
     """Don't suppress a hydrogen with an atom class"""
     smi = "C([H])([H])([H])[H:1]"
     mol = pybel.readstring("smi", smi)
     mol.OBMol.DeleteHydrogens()
     nsmi = mol.write("smi", opt={"a": True, "h": True})
     self.assertEqual("C[H:1]", nsmi.rstrip())
Ejemplo n.º 24
0
 def __file_reader(self, filename):
     if self.reader == 'auto':
         # sys.path.insert(0, "/user/m27/pkg/openbabel/2.3.2/lib")
         from openbabel import pybel
         # import openbabel
         mol = open(filename, 'r').read()
         mol = pybel.readstring("xyz", mol)
         molecule = [(a.OBAtom.GetAtomicNum(), a.OBAtom.x(), a.OBAtom.y(),
                      a.OBAtom.z()) for a in mol.atoms]
         return np.array(molecule)
     elif self.reader == 'manual':
         mol = open(filename, 'r').readlines()
         if len(mol) == 0:
             return np.array([])
         mol = mol[self.skip_lines[0]:len(mol) - self.skip_lines[1]]
         molecule = []
         for atom in mol:
             atom = atom.replace('\t', ' ')
             atom = atom.strip().split(' ')
             atom = list(filter(lambda x: x != '', atom))
             molecule.append([
                 self.Z[atom[0]],
                 float(atom[1]),
                 float(atom[2]),
                 float(atom[3])
             ])
         return np.array(molecule)
Ejemplo n.º 25
0
def getLigandPrints(flist):
    '''
    Get list of ligand fingerprints 
    '''
    fingerprints = []
    names = []
    for fname in flist:
        base,ext = os.path.splitext(fname)
        ext = ext.split('.')[-1]
        if ext == 'smi' or ext == 'ism':
            with open(fname, 'r') as f:
                for line in f:
                    contents = line.split()
                    smi = contents[0]
                    name = contents[1]
                    m = pybel.readstring('smi', smi)
                    fingerprints.append(m.calcfp('ecfp4'))
                    names.append(name)
        else:
            try:
                mols = pybel.readfile(ext, fname)
                for m in mols:
                    fingerprints.append(m.calcfp('ecfp4'))
            except Exception as e:
                print(e)
    return (fingerprints,names)
Ejemplo n.º 26
0
    def GetMolFromKegg(self, ID: str = "") -> None:
        """Get a molecule by kegg id.

        :param ID: KEGG compound identifier (e.g., D02176).
        """
        self.rdmol = getmol.GetMolFromKegg(kid=ID)
        self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
Ejemplo n.º 27
0
 def testAtomMapsAfterCopying(self):
     """Copying a molecule should copy the atom maps"""
     smi = "C[CH2:2]O[Cl:6]"
     obmol = pybel.readstring("smi", smi).OBMol
     copy = pybel.ob.OBMol(obmol)
     copysmi = pybel.Molecule(copy).write("smi", opt={"a": True})
     self.assertEqual(copysmi.rstrip(), smi)
Ejemplo n.º 28
0
def create_pdbqt_from_smiles(smiles, pdbqt_path, pH=7.4):
    """
    Convert a SMILES string to a PDBQT file,
    while adding hydrogen atoms, correcting the protonation state, assigning partial charges,
    and generating a 3D conformer.

    Parameters
    ----------
    smiles: str
        SMILES string.
    pdbqt_path: str or pathlib.path
        Path to output PDBQT file.
    pH: float
        Protonation at given pH.
        Optional; default: 7.4
    """

    molecule = pybel.readstring("smi", smiles)
    optimize_structure_for_docking(molecule,
                                   protonate_for_pH=pH,
                                   generate_3d_structure=True)
    molecule.write("pdbqt",
                   str(Path(pdbqt_path).with_suffix(".pdbqt")),
                   overwrite=True)
    return
Ejemplo n.º 29
0
 def testAromaticityPreservedOnAtomDeletion(self):
     """Ensure that aromaticity is preserved on atom deleteion"""
     mol = pybel.readstring("smi", "c1ccccc1").OBMol
     mol.DeleteAtom(mol.GetFirstAtom())
     self.assertTrue(mol.GetFirstAtom().IsAromatic())
     mol.SetAromaticPerceived(False)
     self.assertFalse(mol.GetFirstAtom().IsAromatic())
Ejemplo n.º 30
0
def smile_to_pdb(smile, pdb_out, mol_name, method_3d='rdkit', iter_num=5000):
    """
    """

    if method_3d == 'openbabel':

        from openbabel import pybel

        conf = pybel.readstring("smi", smile)
        # Get charge
        charge = conf.charge
        conf.make3D(forcefield='mmff94', steps=iter_num)
        conf.write(format='pdb', filename=pdb_out, overwrite=True)

    elif method_3d == 'rdkit':

        from rdkit.Chem import AllChem as Chem

        conf = Chem.MolFromSmiles(smile)
        conf = Chem.AddHs(conf)
        # Get charge
        charge = Chem.GetFormalCharge(conf)
        Chem.EmbedMolecule(conf)
        Chem.MMFFOptimizeMolecule(conf,
                                  mmffVariant='MMFF94',
                                  maxIters=iter_num)
        Chem.MolToPDBFile(conf, filename=pdb_out)

    # Change resname of pdb file to `self.mol_name`
    coor = pdb_manip.Coor(pdb_out)
    index_list = coor.get_index_selection(selec_dict={'res_name': ['UNL']})
    coor.change_index_pdb_field(index_list, change_dict={'res_name': mol_name})
    coor.write_pdb(pdb_out, check_file_out=False)

    return (charge)
Ejemplo n.º 31
0
    def GetMolFromCAS(self, ID="") -> None:
        """Get a molecule by CAS id.

        :param ID: CAS compound identifier (e.g., 50-29-3).
        """
        self.rdmol = getmol.GetMolFromCAS(casid=ID)
        self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
Ejemplo n.º 32
0
def print_output(args, rows):
    if args.oformat == 'table':
        outfile = open(args.output, 'w')
        requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ')
        if args.header:
            outfile.write( 'Identifier\t' + '\t'.join( [ColumnNames[key] for key in requested_fields] ) + '\n' )
        for row in rows:
            outfile.write( row['synonym'] + '\t' + '\t'.join( [str(row[key]) for key in requested_fields] ) + '\n' )

    elif args.oformat in ['sdf', 'mol2']:
        outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
        for row in rows:
            try:
                mol = pybel.readstring('sdf', row['mol'])
                if args.oformat == 'sdf':
                    keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ')
                    mol.data.update( { ColumnNames['synonym'] : row['synonym'] } )
                    if 'inchi_key' in keys:
                        keys = (', '.join(keys).replace( "inchi_key", "inchi_key_first, inchi_key_last" )).split(', ')
                    [ mol.data.update( { ColumnNames[key] : row[key] } ) for key in keys if key]
                outfile.write(mol)
            except:
                pass
    else:
        outfile = open(args.output, 'w')
        outfile.write( '\n'.join( [ '%s\t%s' % (row[args.oformat], row['synonym'] ) for row in rows ] ) )
    outfile.close()
Ejemplo n.º 33
0
    def GetMolFromEBI(self, ID: str = "") -> None:
        """Get a molecule by EBI id.

        :param ID: ChEBI or ChEMBL compound identifier.
        """
        self.rdmol = getmol.GetMolFromEBI(ID)
        self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
Ejemplo n.º 34
0
 def testStereo(self):
     data = [
             ("FC[C@@](Br)(Cl)I",
                 [((2, 3, 4, 5, 6), None, "C[C@@](Br)(Cl)I"),
                 ((2, 3, 4, 5), None, "CC(Br)Cl"),
                 ((1, 2, 3, 4, 5, 6), (4,), "FCC(Br)Cl.I")]
             ),
             ("[C@@H](Br)(Cl)I",
                 [((1, 2, 3), None, "C(Br)Cl"),
                 ((1, 2, 3, 4), (2,), "C(Br)Cl.I")]
             ),
             ("C[C@@H]1CO1",
                 [((2, 3, 4), None, "C1CO1"),]
             ),
             ("F/C=C/I",
                 [
                  ((1, 2, 3, 4), None, "F/C=C/I"),
                  ((1, 2, 3), None, "FC=C"),
                  ((1, 2, 3, 4), (0,), "F.C=CI"),
                  ((1, 2, 3, 4), (1,), "FC.CI")]
             ),
            ]
     for smi, d in data:
         mol = pybel.readstring("smi", smi)
         for a, b, ans in d:
             nmol = ob.OBMol()
             bv = self.createBitVec(7, a)
             bondbv = None if b is None else self.createBitVec(5, b)
             ok = mol.OBMol.CopySubstructure(nmol, bv, bondbv)
             self.assertTrue(ok)
             if "@" not in ans and "/" not in ans:
                 self.assertFalse(nmol.GetData(ob.StereoData))
             self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(),
                              ans)
Ejemplo n.º 35
0
    def GetMolFromDrugbank(self, ID: str = "") -> None:
        """Get a molecule by drugbank id.

        :param ID: Drugbank compound identifier (e.g. DB00133)
        """
        self.rdmol = getmol.GetMolFromDrugbank(dbid=ID)
        self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
    def retrain(self, n, keep_top_n, smiles_and_scores):
        print("writing dataset...")
        name = 'molexit-%d' % n
        dataset = '../models/molexit/%s.txt' % name
        dataset_scores = []
        with open(dataset, 'w') as f:
            for smi, score in list(
                    reversed(sorted(smiles_and_scores,
                                    key=lambda p: p[1])))[:keep_top_n]:
                dsmi = self.converter.encode(
                    pybel.readstring("smi", smi.strip()).write("can").strip())
                tok = DeepSMILESTokenizer(dsmi)
                tokens = tok.get_tokens()
                f.write(' '.join([t.value for t in tokens]))
                f.write("\n")
                dataset_scores.append(score)

        print('dataset: size: %s, mean score: %s, max score: %s' %
              (len(dataset_scores), np.mean(dataset_scores),
               np.max(dataset_scores)))
        print('training new LM...')
        self.lm_trainer.train(10, dataset, '../models/molexit', name)

        vocab = get_arpa_vocab('../models/molexit/%s.arpa' % name)
        self.lm = KenLMDeepSMILESLanguageModel(
            '../models/molexit/%s.klm' % name, vocab)
Ejemplo n.º 37
0
 def testStereo(self):
     data = [
             ("FC[C@@](Br)(Cl)I",
                 [((2, 3, 4, 5, 6), None, "C[C@@](Br)(Cl)I"),
                 ((2, 3, 4, 5), None, "CC(Br)Cl"),
                 ((1, 2, 3, 4, 5, 6), (4,), "FCC(Br)Cl.I")]
             ),
             ("[C@@H](Br)(Cl)I",
                 [((1, 2, 3), None, "C(Br)Cl"),
                 ((1, 2, 3, 4), (2,), "C(Br)Cl.I")]
             ),
             ("C[C@@H]1CO1",
                 [((2, 3, 4), None, "C1CO1"),]
             ),
             ("F/C=C/I",
                 [
                  ((1, 2, 3, 4), None, "F/C=C/I"),
                  ((1, 2, 3), None, "FC=C"),
                  ((1, 2, 3, 4), (0,), "F.C=CI"),
                  ((1, 2, 3, 4), (1,), "FC.CI")]
             ),
            ]
     for smi, d in data:
         mol = pybel.readstring("smi", smi)
         for a, b, ans in d:
             nmol = ob.OBMol()
             bv = self.createBitVec(7, a)
             bondbv = None if b is None else self.createBitVec(5, b)
             ok = mol.OBMol.CopySubstructure(nmol, bv, bondbv)
             self.assertTrue(ok)
             if "@" not in ans and "/" not in ans:
                 self.assertFalse(nmol.GetData(ob.StereoData))
             self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(),
                              ans)
Ejemplo n.º 38
0
 def testAromaticityPreservedOnAtomDeletion(self):
     """Ensure that aromaticity is preserved on atom deleteion"""
     mol = pybel.readstring("smi", "c1ccccc1").OBMol
     mol.DeleteAtom(mol.GetFirstAtom())
     self.assertTrue(mol.GetFirstAtom().IsAromatic())
     mol.SetAromaticPerceived(False)
     self.assertFalse(mol.GetFirstAtom().IsAromatic())
Ejemplo n.º 39
0
 def testDeleteHydrogens(self):
     """Don't suppress a hydrogen with an atom class"""
     smi = "C([H])([H])([H])[H:1]"
     mol = pybel.readstring("smi", smi)
     mol.OBMol.DeleteHydrogens()
     nsmi = mol.write("smi", opt={"a": True, "h": True})
     self.assertEqual("C[H:1]", nsmi.rstrip())
Ejemplo n.º 40
0
def write_mol2(geo, outf):
    ftype = 'xyz'
    _str = geo.write_struct(None, ftype=ftype)
    mymols = list([pybel.readstring(ftype, _str)])
    mymol = mymols[0]
    _stro = mymol.write('mol2')
    with open(outf, 'w') as fh:
        fh.write(_stro)
Ejemplo n.º 41
0
 def testBasic(self):
     smis = ["C>N>O", "C>N>", ">N>O", ">N>", "C>>", ">>O", ">>"]
     for smi in smis:
         nsmi = pybel.readstring("smi", smi).write("smi").rstrip()
         self.assertEqual(smi, nsmi)
     badsmis = ["C>>N>O", ">>>", "C>N>O>", ">", ">N", "N>"]
     for smi in badsmis:
         self.assertRaises(IOError, pybel.readstring, "smi", smi)
Ejemplo n.º 42
0
    def ReadMol(self, molstr: str = "", molformat: str = 'smi') -> None:
        """Read a molecular input string.

        :param molstr: input molecular string
        :param molformat: 3-letters code for openbabel supported format
        """
        self.mol = pybel.readstring(molformat, molstr)
        self.rdmol = Chem.MolFromMolBlock(self.mol.write(format='sdf'))
def _max_atoms_in_mol_block(mol_block_list):
    max_num_atoms = -1024
    for mol_block in mol_block_list:
        mol_str = '\n'.join(mol_block)
        mol = pybel.readstring('sdf', mol_str)
        if len(mol.atoms) > max_num_atoms:
            max_num_atoms = len(mol.atoms)
    return max(max_num_atoms, 0)
Ejemplo n.º 44
0
    def add_hydrogen(self):
        from openbabel import pybel as pb

        mol_0d = pb.readstring("smi", "CCCC").OBMol
        self.assertEqual(len(pb.Molecule(mol_0d).atoms), 2)
        adaptor = BabelMolAdaptor(mol_0d)
        adaptor.add_hydrogen()
        self.assertEqual(len(adaptor.pymatgen_mol.sites), 14)
Ejemplo n.º 45
0
 def testBasic(self):
     smis = ["C>N>O", "C>N>", ">N>O", ">N>", "C>>", ">>O", ">>"]
     for smi in smis:
         nsmi = pybel.readstring("smi", smi).write("smi").rstrip()
         self.assertEqual(smi, nsmi)
     badsmis = ["C>>N>O", ">>>", "C>N>O>", ">", ">N", "N>"]
     for smi in badsmis:
         self.assertRaises(IOError, pybel.readstring, "smi", smi)
Ejemplo n.º 46
0
def ReadMol(molstructure: str, molformat: str = 'smi') -> Chem.Mol:
    """Read molecular text of the specified format.

    :param molstructure: molecular text
    :param molformat: 3-letters code for openbabel supported format
    """
    mol = pybel.readstring(molformat, molstructure)
    return mol
Ejemplo n.º 47
0
 def testLPStereo(self):
     """Ensure that nitrogen and sulfur can support LP stereo"""
     data = ["[N@@](Cl)(Br)I", "Cl[N@@](Br)I",
             "[S@@](Cl)(Br)I", "Cl[S@@](Br)I"]
     for smi in data:
         mol = pybel.readstring("smi", smi)
         self.assertTrue(mol.OBMol.GetData(ob.StereoData))
         nsmi = mol.write("smi").rstrip()
         self.assertEqual(smi, nsmi)
Ejemplo n.º 48
0
    def testBasic(self):
        mol = pybel.readstring("smi", "ICBr")
        bv = self.createBitVec(4, (1, 3))
        nmol = ob.OBMol()
        ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0)
        self.assertTrue(ok)
        self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br]")
        bv = self.createBitVec(4, (2,))
        ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0)
        self.assertTrue(ok)
        self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br].[CH2]")

        mol = pybel.readstring("smi", "CCC")
        bv = self.createBitVec(4, (1,))
        bondv = self.createBitVec(2, (1,))
        nmol = ob.OBMol()
        ok = mol.OBMol.CopySubstructure(nmol, bv, bondv, 0)
        self.assertTrue(ok)
Ejemplo n.º 49
0
 def testRoundtripThroughRXN(self):
     data = ["C>N>O", "C>>O", "C.N>>O", "C>>O.N",
             "C>>O", ">>O", "C>>", ">N>", ">>"]
     for rsmi in data:
         rxn = pybel.readstring("smi", rsmi).write("rxn")
         mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip()
         self.assertEqual(mrsmi, rsmi)
     # Test -G option, which changes the treatment of agents
     rsmi = "C>N>O"
     ans = {"agent": "C>N>O",
            "reactant": "C.N>>O",
            "product": "C>>O.N",
            "both": "C.N>>O.N",
            "ignore": "C>>O"}
     for option, result in ans.items():
         rxn = pybel.readstring("smi", rsmi).write("rxn", opt={"G":option})
         mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip()
         self.assertEqual(mrsmi, result)
Ejemplo n.º 50
0
 def testIterators(self):
     """Basic check that at least two iterators are working"""
     mol = pybel.readstring("smi", "c1ccccc1C(=O)Cl")
     atoms = list(ob.OBMolAtomIter(mol.OBMol))
     self.assertEqual(len(atoms), 9)
     elements = [atom.GetAtomicNum() for atom in atoms]
     self.assertEqual(elements, [6,6,6,6,6,6,6,8,17])
     bonds = list(ob.OBMolBondIter(mol.OBMol))
     self.assertEqual(len(bonds), 9)
Ejemplo n.º 51
0
 def testTinkerXYZ(self):
     """Atom classes are written out as the atom types (though
     not currently read)"""
     smi = "[CH4:23]"
     mol = pybel.readstring("smi", smi)
     xyz = mol.write("txyz", opt={"c": True})
     lines = xyz.split("\n")
     broken = lines[1].split()
     self.assertEqual("23", broken[-1].rstrip())
Ejemplo n.º 52
0
 def testTinkerXYZ(self):
     """Atom classes are written out as the atom types (though
     not currently read)"""
     smi = "[CH4:23]"
     mol = pybel.readstring("smi", smi)
     xyz = mol.write("txyz", opt={"c": True})
     lines = xyz.split("\n")
     broken = lines[1].split()
     self.assertEqual("23", broken[-1].rstrip())
Ejemplo n.º 53
0
 def testIterators(self):
     """Basic check that at least two iterators are working"""
     mol = pybel.readstring("smi", "c1ccccc1C(=O)Cl")
     atoms = list(ob.OBMolAtomIter(mol.OBMol))
     self.assertEqual(len(atoms), 9)
     elements = [atom.GetAtomicNum() for atom in atoms]
     self.assertEqual(elements, [6, 6, 6, 6, 6, 6, 6, 8, 17])
     bonds = list(ob.OBMolBondIter(mol.OBMol))
     self.assertEqual(len(bonds), 9)
Ejemplo n.º 54
0
 def testKekulizationOfHypervalents(self):
     # We should support hypervalent aromatic S and N (the latter
     # as we write them)
     data = [("Cs1(=O)ccccn1", "CS1(=O)=NC=CC=C1"),
             ("n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12",
              "n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12")]
     for inp, out in data:
         mol = pybel.readstring("smi", inp)
         self.assertEqual(out, mol.write("smi").rstrip())
Ejemplo n.º 55
0
    def testBasic(self):
        mol = pybel.readstring("smi", "ICBr")
        bv = self.createBitVec(4, (1, 3))
        nmol = ob.OBMol()
        ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0)
        self.assertTrue(ok)
        self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br]")
        bv = self.createBitVec(4, (2,))
        ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0)
        self.assertTrue(ok)
        self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br].[CH2]")

        mol = pybel.readstring("smi", "CCC")
        bv = self.createBitVec(4, (1,))
        bondv = self.createBitVec(2, (1,))
        nmol = ob.OBMol()
        ok = mol.OBMol.CopySubstructure(nmol, bv, bondv, 0)
        self.assertTrue(ok)
Ejemplo n.º 56
0
 def testRoundtripThroughRXN(self):
     data = ["C>N>O", "C>>O", "C.N>>O", "C>>O.N",
             "C>>O", ">>O", "C>>", ">N>", ">>"]
     for rsmi in data:
         rxn = pybel.readstring("smi", rsmi).write("rxn")
         mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip()
         self.assertEqual(mrsmi, rsmi)
     # Test -G option, which changes the treatment of agents
     rsmi = "C>N>O"
     ans = {"agent": "C>N>O",
            "reactant": "C.N>>O",
            "product": "C>>O.N",
            "both": "C.N>>O.N",
            "ignore": "C>>O"}
     for option, result in ans.items():
         rxn = pybel.readstring("smi", rsmi).write("rxn", opt={"G":option})
         mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip()
         self.assertEqual(mrsmi, result)
Ejemplo n.º 57
0
    def testInvalidRxn(self):
        """IsValid() should flag up invalid reaction data"""
        mol = pybel.readstring("smi", "CC>>O").OBMol
        facade = ob.OBReactionFacade(mol)
        self.assertTrue(facade.IsValid())
        mol.SetIsReaction(False)
        self.assertFalse(facade.IsValid())
        mol.SetIsReaction()
        self.assertTrue(facade.IsValid())

        atom = mol.GetAtom(1)

        facade.SetRole(atom, 4)
        self.assertFalse(facade.IsValid()) # invalid role
        facade.SetRole(atom, ob.REACTANT)
        self.assertTrue(facade.IsValid())

        data = atom.GetData("rxncomp")
        ob.toPairInteger(data).SetValue(-1)
        self.assertFalse(facade.IsValid()) # invalid rxn component id

        atom.DeleteData(data)
        self.assertFalse(atom.HasData("rxncomp"))
        self.assertFalse(facade.IsValid()) # data missing

        newdata = ob.OBPairData()
        newdata.SetAttribute("rxncomp")
        newdata.SetValue("1")
        atom.CloneData(newdata)
        self.assertTrue(atom.HasData("rxncomp"))
        self.assertFalse(facade.IsValid()) # wrong type of data

        # Connected component should not belong to two different
        # rxn components or two different reaction roles
        mol = pybel.readstring("smi", "CC>>O").OBMol
        facade = ob.OBReactionFacade(mol)
        self.assertTrue(facade.IsValid())
        atom = mol.GetAtom(1)
        facade.SetComponentId(atom, 99)
        self.assertFalse(facade.IsValid())
        facade.SetComponentId(atom, 1)
        self.assertTrue(facade.IsValid())
        facade.SetRole(atom, ob.AGENT)
        self.assertFalse(facade.IsValid())
Ejemplo n.º 58
0
 def testProper2DofFragments(self):
     """Check for proper handling of fragments in mcdl routines, see issue #1889"""
     mol = pybel.readstring("smi", "[H+].CC[O-].CC[O-]")
     mol.draw(show=False, update=True)
     dists = [
         abs(a.coords[0] - b.coords[0]) + abs(a.coords[1] - b.coords[1])
         for a, b in itertools.combinations(mol.atoms, 2)
     ]
     mindist = min(dists)
     self.assertTrue(mindist > 0.00001)
Ejemplo n.º 59
0
 def testKekulizationOfHypervalents(self):
     # We should support hypervalent aromatic S and N (the latter
     # as we write them)
     data = [("Cs1(=O)ccccn1",
              "CS1(=O)=NC=CC=C1"),
             ("n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12",
              "n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12")]
     for inp, out in data:
         mol = pybel.readstring("smi", inp)
         self.assertEqual(out, mol.write("smi").rstrip())
Ejemplo n.º 60
0
 def testOldRingInformationIsWipedOnReperception(self):
     """Previously, the code that identified ring atoms and bonds
     did not set the flags of non-ring atoms. This meant that no
     matter what you did to the structure, once a ring-atom, always a
     ring atom."""
     mol = pybel.readstring("smi", "c1ccccc1")
     atom = mol.atoms[0].OBAtom
     self.assertTrue(atom.IsInRing()) # trigger perception
     mol.OBMol.DeleteAtom(mol.atoms[-1].OBAtom)
     self.assertFalse(atom.IsInRing()) # this used to return True