def testReadingMassDifferenceInMolfiles(self): """Previously we were rounding incorrectly when reading the mass diff""" template = """ OpenBabel02181811152D 1 0 0 0 0 0 0 0 0 0999 V2000 0.0000 0.0000 0.0000 %2s %2d 0 0 0 0 0 0 0 0 0 0 0 M END """ # Positive test cases: # These are the BIOVIA Draw answers for the first 50 elements for # a mass diff of 1 answers = [2,5,8,10,12,13,15,17,20,21,24,25,28,29,32,33,36,41,40,41,46,49,52,53,56,57,60,60,65,66,71,74,76,80,81,85,86,89,90,92,94,97,99,102,104,107,109,113,116,120,123] for idx, answer in enumerate(answers): elem = idx + 1 molfile = template % (ob.GetSymbol(elem), 1) mol = pybel.readstring("mol", molfile).OBMol iso = mol.GetAtom(1).GetIsotope() self.assertEqual(answer, iso) # Also test D and T - BIOVIA Draw ignores the mass diff for elem, answer in zip("DT", [2, 3]): molfile = template % (elem, 1) mol = pybel.readstring("mol", molfile).OBMol iso = mol.GetAtom(1).GetIsotope() self.assertEqual(answer, iso) # Negative test cases: # Test error message for out-of-range values for value in [5, -4]: molfile = template % ("C", value) mol = pybel.readstring("mol", molfile).OBMol iso = mol.GetAtom(1).GetIsotope() self.assertEqual(0, iso)
def testSquarePlanar(self): """Tighten up the parsing of SP stereochemistry in SMILES""" good = [ "C[S@SP1](Cl)(Br)I", "C[S@SP2](Cl)(Br)I", "C[S@SP3](Cl)(Br)I", ] bad = [ # raises error "C[S@SP0](Cl)(Br)I", "C[S@SP4](Cl)(Br)I", "C[S@@SP1](Cl)(Br)I", "C[S@SP11](Cl)(Br)I", "C[S@SO1](Cl)(Br)I", ] alsobad = [ # just a warning "C[S@SP1](Cl)(Br)(F)I", "C[S@SP1](Cl)(Br)(F)1CCCC1", ] for smi in good: mol = pybel.readstring("smi", smi) self.assertTrue(mol.OBMol.GetData(ob.StereoData)) for smi in bad: self.assertRaises(IOError, pybel.readstring, "smi", smi) for smi in alsobad: mol = pybel.readstring("smi", smi) self.assertTrue(mol.OBMol.GetData(ob.StereoData))
def testOBMolSeparatePreservesAtomOrder(self): """Originally Separate() preserved DFS order rather than atom order""" # First test smi = "C123.F3.Cl2.Br1" mol = pybel.readstring("smi", smi) atomicnums = [atom.OBAtom.GetAtomicNum() for atom in mol] mols = mol.OBMol.Separate() new_atomicnums = [atom.OBAtom.GetAtomicNum() for atom in pybel.Molecule(mols[0])] for x, y in zip(atomicnums, new_atomicnums): self.assertEqual(x, y) # check that the atoms have not been permuted # Second test xyz = """6 examples/water_dimer.xyz O 0.12908 -0.26336 0.64798 H 0.89795 0.28805 0.85518 H 0.10833 -0.20468 -0.33302 O 0.31020 0.07569 -2.07524 H 0.64083 -0.57862 -2.71449 H -0.26065 0.64232 -2.62218 """ mol = pybel.readstring("xyz", xyz) mols = mol.OBMol.Separate() allatoms = pybel.Molecule(mols[0]).atoms + pybel.Molecule(mols[1]).atoms for idx, atom in enumerate(allatoms): xcoord = atom.OBAtom.GetX() orig_xcoord = mol.OBMol.GetAtom(idx+1).GetX() self.assertEqual(xcoord, orig_xcoord)
def testMOL(self): """Roundtrip thru MOL file""" smi = "C[CH3:6]" mol = pybel.readstring("smi", smi) molfile = mol.write("mol", opt={"a":True}) molb = pybel.readstring("mol", molfile) out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True}) self.assertEqual(smi, out)
def testSettingSpinMult(self): """Set spin and read/write it""" mol = pybel.readstring("smi", "C") mol.atoms[0].OBAtom.SetSpinMultiplicity(2) molfile = mol.write("mol") self.assertEqual("M RAD 1 1 2", molfile.split("\n")[5]) molb = pybel.readstring("mol", molfile) self.assertEqual(2, molb.atoms[0].OBAtom.GetSpinMultiplicity()) self.assertEqual(4, molb.atoms[0].OBAtom.GetImplicitHCount())
def testRGroup(self): """[*:1] is converted to R1 in MOL file handling""" smi = "[*:6]C" mol = pybel.readstring("smi", smi) molfile = mol.write("mol") self.assertTrue("M RGP 1 1 6" in molfile) molb = pybel.readstring("mol", molfile) out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True}) self.assertEqual(smi, out)
def testSmilesParsingAndWritingOfLargeIsotopes(self): smis = ["[1C]", "[11C]", "[111C]", "[1111C]"] for smi in smis: mol = pybel.readstring("smi", smi) self.assertEqual(mol.write("smi").rstrip(), smi) self.assertRaises(IOError, pybel.readstring, "smi", "[11111C]") mol = pybel.readstring("smi", "[C]") mol.atoms[0].OBAtom.SetIsotope(65535) self.assertEqual(mol.write("smi").rstrip(), "[C]")
def testInChIIsotopes(self): """Ensure that we correctly set and read isotopes in InChIs""" with open(os.path.join(here, "inchi", "inchi_isotopes.txt")) as inp: for line in inp: if line.startswith("#"): continue smi, inchi = line.rstrip().split("\t") minchi = pybel.readstring("smi", smi).write("inchi").rstrip() self.assertEqual(minchi, inchi) msmi = pybel.readstring("inchi", minchi).write("smi").rstrip() self.assertEqual(msmi, smi)
def testAtomMapsAfterDeletion(self): """Removing atoms/hydrogens should not mess up the atom maps""" smis = ["C[NH2:2]", "[CH3:1][NH2:2]"] for smi in smis: mol = pybel.readstring("smi", smi) mol.OBMol.DeleteAtom(mol.OBMol.GetAtom(1)) self.assertEqual(mol.write("smi", opt={"a":True}).rstrip(), "[NH2:2]") smi = "[H]C[NH:2]" mol = pybel.readstring("smi", smi) mol.removeh() self.assertEqual(mol.write("smi", opt={"a":True}).rstrip(), "C[NH:2]")
def testCML(self): """OB stores atom classes using _NN at the end of atom ids""" smis = ["[CH3:6]C", "[CH3:6][OH:6]", "O"+"[CH2:2]"*27+"O" ] for smi in smis: mol = pybel.readstring("smi", smi) cml = mol.write("cml") molb = pybel.readstring("mol", cml) out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True}) self.assertEqual(smi, out)
def testSmilesAtomOrder(self): """Ensure that SMILES atom order is written correctly""" data = [("CC", "1 2"), ("O=CCl", "3 2 1")] for smi, atomorder in data: mol = pybel.readstring("smi", smi) mol.write("can", opt={"O": True}) res = mol.data["SMILES Atom Order"] self.assertEqual(res, atomorder) mol = pybel.readstring("smi", "CC") mol.write("can") self.assertFalse("SMILES Atom Order" in mol.data)
def testFuzzingTestCases(self): """Ensure that fuzzing testcases do not cause crashes""" # rejected as invalid smiles smis = [r"\0", "&0", "=&", "[H][S][S][S@S00]0[S][S@S00H](0[S@S00][S])0n"] for smi in smis: self.assertRaises(IOError, pybel.readstring, "smi", smi) smis = ["c0C[C@H](B)00O0"] # warning and stereo ignored for smi in smis: pybel.readstring("smi", smi)
def testSmilesToMol(self): smis = ["C", "[CH3]", "[CH2]", "[CH2]C", "[C]"] valences = [0, 3, 2, 3, 15] for smi, valence in zip(smis, valences): mol = pybel.readstring("smi", smi) molfile = mol.write("mol") firstcarbon = molfile.split("\n")[4] mvalence = int(firstcarbon[48:53]) self.assertEqual(valence, mvalence) # test molfile->smiles msmi = pybel.readstring("mol", molfile).write("smi").rstrip() self.assertEqual(smi, msmi)
def testImplicitCisDblBond(self): """Ensure that dbl bonds in rings of size 8 or less are always implicitly cis""" smi = "C1/C=C/C" for i in range(5): # from size 4 to 8 ringsize = i + 4 ringsmi = smi + "1" roundtrip = pybel.readstring("smi", ringsmi).write("smi") self.assertTrue("/" not in roundtrip) smi += "C" ringsize = 9 ringsmi = smi + "1" roundtrip = pybel.readstring("smi", ringsmi).write("smi") self.assertTrue("/" in roundtrip)
def testSmiToSmi(self): # Should preserve stereo tet = "[C@@H](Br)(Br)Br" out = pybel.readstring("smi", tet).write("smi") self.assertTrue("@" in out) cistrans = r"C/C=C(\C)/C" out = pybel.readstring("smi", cistrans).write("smi") self.assertTrue("/" in out) # Should wipe stereo out = pybel.readstring("smi", tet, opt={"S": True}).write("smi") self.assertFalse("@" in out) cistrans = r"C/C=C(\C)/C" out = pybel.readstring("smi", cistrans, opt={"S": True}).write("smi") self.assertFalse("/" in out)
def generate_atomic_coordinates(smiles) -> str: """Attempt to further refine the molecular structure through a rotor search Code adapted from: http://forums.openbabel.org/OpenBabel-Conformer-Search-td4177357.html Args: smiles (string): Smiles string of molecule to be generated Returns: (string): XYZ coordinates of molecule """ # Convert it to a OpenBabel molecule mol = readstring('smi', smiles) # Generate initial 3D coordinates mol.make3D() # Try to get a forcefield that works with this molecule ff = _get_forcefield(mol) # initial cleanup before the weighted search ff.SteepestDescent(500, 1.0e-4) ff.WeightedRotorSearch(100, 20) ff.ConjugateGradients(500, 1.0e-6) ff.GetCoordinates(mol.OBMol) return mol.write("xyz")
def GetMolFromNCBI(self, ID: str = "") -> None: """Get a molecule by NCBI id. :param ID: CID NCBI compound identifier (e.g., 2244). """ self.rdmol = getmol.GetMolFromNCBI(cid=ID) self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
def testAtomMapsAfterCopying(self): """Copying a molecule should copy the atom maps""" smi = "C[CH2:2]O[Cl:6]" obmol = pybel.readstring("smi", smi).OBMol copy = pybel.ob.OBMol(obmol) copysmi = pybel.Molecule(copy).write("smi", opt={"a": True}) self.assertEqual(copysmi.rstrip(), smi)
def testOBMolSeparatePreservesAromaticity(self): """If the original molecule had aromaticity perceived, then the fragments should also. """ smi = "C.c1ccccc1" # Two passes: One with aromaticity perceived on the orig mol and # one without for N in range(2): obmol = pybel.readstring("smi", smi).OBMol # Aromaticity is perceived during the last step of reading SMILES # so let's unset it here for the first pass if N == 0: obmol.SetAromaticPerceived(False) else: self.assertTrue(obmol.HasAromaticPerceived()) # After separation, is aromaticity the same as the parent? mols = obmol.Separate() if N == 0: self.assertFalse(mols[1].HasAromaticPerceived()) else: self.assertTrue(mols[1].HasAromaticPerceived()) atom = mols[1].GetAtom(1) atom.SetImplicitHCount(0) # mess up the structure if N == 0: self.assertFalse(atom.IsAromatic()) else: self.assertTrue(atom.IsAromatic())
def testOBMolSeparatePreservesAromaticity(self): """If the original molecule had aromaticity perceived, then the fragments should also. """ smi = "C.c1ccccc1" # Two passes: One with aromaticity perceived on the orig mol and # one without for N in range(2): obmol = pybel.readstring("smi", smi).OBMol # Aromaticity is perceived during the last step of reading SMILES # so let's unset it here for the first pass if N == 0: obmol.SetAromaticPerceived(False) else: self.assertTrue(obmol.HasAromaticPerceived()) # After separation, is aromaticity the same as the parent? mols = obmol.Separate() if N == 0: self.assertFalse(mols[1].HasAromaticPerceived()) else: self.assertTrue(mols[1].HasAromaticPerceived()) atom = mols[1].GetAtom(1) atom.SetImplicitHCount(0) # mess up the structure if N == 0: self.assertFalse(atom.IsAromatic()) else: self.assertTrue(atom.IsAromatic())
def parse_mol_simple(molformat, molstr): if molformat == "smiles": format = "smi" else: format = molformat return pybel.readstring(format, molstr)
def convert_ase2rdkit(atoms, removeHs=False): """ Convert an ASE atoms object to rdkit molecule. The ordering of the Atoms is identical. Important: Implemented only for clusters, not PBC! rdkit does not keep xyz coordinates, therefore a backconversion is not possible yet. Parameters ---------- atoms : ase.Atoms The ASE atoms object removeHs : Bool If True, remove all H atoms from molecule. Returns ------- mol : rdkit.Chem.rdchem.Mol The rdkit molecule object. """ a_str = __ase2xyz__(atoms) pymol = pb.readstring("xyz", a_str) mol = pymol.write("mol") mol = Chem.MolFromMolBlock(mol, removeHs=removeHs) return mol
def testDeleteHydrogens(self): """Don't suppress a hydrogen with an atom class""" smi = "C([H])([H])([H])[H:1]" mol = pybel.readstring("smi", smi) mol.OBMol.DeleteHydrogens() nsmi = mol.write("smi", opt={"a": True, "h": True}) self.assertEqual("C[H:1]", nsmi.rstrip())
def __file_reader(self, filename): if self.reader == 'auto': # sys.path.insert(0, "/user/m27/pkg/openbabel/2.3.2/lib") from openbabel import pybel # import openbabel mol = open(filename, 'r').read() mol = pybel.readstring("xyz", mol) molecule = [(a.OBAtom.GetAtomicNum(), a.OBAtom.x(), a.OBAtom.y(), a.OBAtom.z()) for a in mol.atoms] return np.array(molecule) elif self.reader == 'manual': mol = open(filename, 'r').readlines() if len(mol) == 0: return np.array([]) mol = mol[self.skip_lines[0]:len(mol) - self.skip_lines[1]] molecule = [] for atom in mol: atom = atom.replace('\t', ' ') atom = atom.strip().split(' ') atom = list(filter(lambda x: x != '', atom)) molecule.append([ self.Z[atom[0]], float(atom[1]), float(atom[2]), float(atom[3]) ]) return np.array(molecule)
def getLigandPrints(flist): ''' Get list of ligand fingerprints ''' fingerprints = [] names = [] for fname in flist: base,ext = os.path.splitext(fname) ext = ext.split('.')[-1] if ext == 'smi' or ext == 'ism': with open(fname, 'r') as f: for line in f: contents = line.split() smi = contents[0] name = contents[1] m = pybel.readstring('smi', smi) fingerprints.append(m.calcfp('ecfp4')) names.append(name) else: try: mols = pybel.readfile(ext, fname) for m in mols: fingerprints.append(m.calcfp('ecfp4')) except Exception as e: print(e) return (fingerprints,names)
def GetMolFromKegg(self, ID: str = "") -> None: """Get a molecule by kegg id. :param ID: KEGG compound identifier (e.g., D02176). """ self.rdmol = getmol.GetMolFromKegg(kid=ID) self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
def testAtomMapsAfterCopying(self): """Copying a molecule should copy the atom maps""" smi = "C[CH2:2]O[Cl:6]" obmol = pybel.readstring("smi", smi).OBMol copy = pybel.ob.OBMol(obmol) copysmi = pybel.Molecule(copy).write("smi", opt={"a": True}) self.assertEqual(copysmi.rstrip(), smi)
def create_pdbqt_from_smiles(smiles, pdbqt_path, pH=7.4): """ Convert a SMILES string to a PDBQT file, while adding hydrogen atoms, correcting the protonation state, assigning partial charges, and generating a 3D conformer. Parameters ---------- smiles: str SMILES string. pdbqt_path: str or pathlib.path Path to output PDBQT file. pH: float Protonation at given pH. Optional; default: 7.4 """ molecule = pybel.readstring("smi", smiles) optimize_structure_for_docking(molecule, protonate_for_pH=pH, generate_3d_structure=True) molecule.write("pdbqt", str(Path(pdbqt_path).with_suffix(".pdbqt")), overwrite=True) return
def testAromaticityPreservedOnAtomDeletion(self): """Ensure that aromaticity is preserved on atom deleteion""" mol = pybel.readstring("smi", "c1ccccc1").OBMol mol.DeleteAtom(mol.GetFirstAtom()) self.assertTrue(mol.GetFirstAtom().IsAromatic()) mol.SetAromaticPerceived(False) self.assertFalse(mol.GetFirstAtom().IsAromatic())
def smile_to_pdb(smile, pdb_out, mol_name, method_3d='rdkit', iter_num=5000): """ """ if method_3d == 'openbabel': from openbabel import pybel conf = pybel.readstring("smi", smile) # Get charge charge = conf.charge conf.make3D(forcefield='mmff94', steps=iter_num) conf.write(format='pdb', filename=pdb_out, overwrite=True) elif method_3d == 'rdkit': from rdkit.Chem import AllChem as Chem conf = Chem.MolFromSmiles(smile) conf = Chem.AddHs(conf) # Get charge charge = Chem.GetFormalCharge(conf) Chem.EmbedMolecule(conf) Chem.MMFFOptimizeMolecule(conf, mmffVariant='MMFF94', maxIters=iter_num) Chem.MolToPDBFile(conf, filename=pdb_out) # Change resname of pdb file to `self.mol_name` coor = pdb_manip.Coor(pdb_out) index_list = coor.get_index_selection(selec_dict={'res_name': ['UNL']}) coor.change_index_pdb_field(index_list, change_dict={'res_name': mol_name}) coor.write_pdb(pdb_out, check_file_out=False) return (charge)
def GetMolFromCAS(self, ID="") -> None: """Get a molecule by CAS id. :param ID: CAS compound identifier (e.g., 50-29-3). """ self.rdmol = getmol.GetMolFromCAS(casid=ID) self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
def print_output(args, rows): if args.oformat == 'table': outfile = open(args.output, 'w') requested_fields = (filter(lambda x: x not in ["[", "]", "'"], args.fetch)).split(', ') if args.header: outfile.write( 'Identifier\t' + '\t'.join( [ColumnNames[key] for key in requested_fields] ) + '\n' ) for row in rows: outfile.write( row['synonym'] + '\t' + '\t'.join( [str(row[key]) for key in requested_fields] ) + '\n' ) elif args.oformat in ['sdf', 'mol2']: outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for row in rows: try: mol = pybel.readstring('sdf', row['mol']) if args.oformat == 'sdf': keys = filter(lambda x: x not in ["[", "]", "'"], args.fetch).split(', ') mol.data.update( { ColumnNames['synonym'] : row['synonym'] } ) if 'inchi_key' in keys: keys = (', '.join(keys).replace( "inchi_key", "inchi_key_first, inchi_key_last" )).split(', ') [ mol.data.update( { ColumnNames[key] : row[key] } ) for key in keys if key] outfile.write(mol) except: pass else: outfile = open(args.output, 'w') outfile.write( '\n'.join( [ '%s\t%s' % (row[args.oformat], row['synonym'] ) for row in rows ] ) ) outfile.close()
def GetMolFromEBI(self, ID: str = "") -> None: """Get a molecule by EBI id. :param ID: ChEBI or ChEMBL compound identifier. """ self.rdmol = getmol.GetMolFromEBI(ID) self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
def testStereo(self): data = [ ("FC[C@@](Br)(Cl)I", [((2, 3, 4, 5, 6), None, "C[C@@](Br)(Cl)I"), ((2, 3, 4, 5), None, "CC(Br)Cl"), ((1, 2, 3, 4, 5, 6), (4,), "FCC(Br)Cl.I")] ), ("[C@@H](Br)(Cl)I", [((1, 2, 3), None, "C(Br)Cl"), ((1, 2, 3, 4), (2,), "C(Br)Cl.I")] ), ("C[C@@H]1CO1", [((2, 3, 4), None, "C1CO1"),] ), ("F/C=C/I", [ ((1, 2, 3, 4), None, "F/C=C/I"), ((1, 2, 3), None, "FC=C"), ((1, 2, 3, 4), (0,), "F.C=CI"), ((1, 2, 3, 4), (1,), "FC.CI")] ), ] for smi, d in data: mol = pybel.readstring("smi", smi) for a, b, ans in d: nmol = ob.OBMol() bv = self.createBitVec(7, a) bondbv = None if b is None else self.createBitVec(5, b) ok = mol.OBMol.CopySubstructure(nmol, bv, bondbv) self.assertTrue(ok) if "@" not in ans and "/" not in ans: self.assertFalse(nmol.GetData(ob.StereoData)) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), ans)
def GetMolFromDrugbank(self, ID: str = "") -> None: """Get a molecule by drugbank id. :param ID: Drugbank compound identifier (e.g. DB00133) """ self.rdmol = getmol.GetMolFromDrugbank(dbid=ID) self.mol = pybel.readstring('sdf', Chem.MolToMolBlock(self.rdmol))
def retrain(self, n, keep_top_n, smiles_and_scores): print("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name dataset_scores = [] with open(dataset, 'w') as f: for smi, score in list( reversed(sorted(smiles_and_scores, key=lambda p: p[1])))[:keep_top_n]: dsmi = self.converter.encode( pybel.readstring("smi", smi.strip()).write("can").strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens() f.write(' '.join([t.value for t in tokens])) f.write("\n") dataset_scores.append(score) print('dataset: size: %s, mean score: %s, max score: %s' % (len(dataset_scores), np.mean(dataset_scores), np.max(dataset_scores))) print('training new LM...') self.lm_trainer.train(10, dataset, '../models/molexit', name) vocab = get_arpa_vocab('../models/molexit/%s.arpa' % name) self.lm = KenLMDeepSMILESLanguageModel( '../models/molexit/%s.klm' % name, vocab)
def testStereo(self): data = [ ("FC[C@@](Br)(Cl)I", [((2, 3, 4, 5, 6), None, "C[C@@](Br)(Cl)I"), ((2, 3, 4, 5), None, "CC(Br)Cl"), ((1, 2, 3, 4, 5, 6), (4,), "FCC(Br)Cl.I")] ), ("[C@@H](Br)(Cl)I", [((1, 2, 3), None, "C(Br)Cl"), ((1, 2, 3, 4), (2,), "C(Br)Cl.I")] ), ("C[C@@H]1CO1", [((2, 3, 4), None, "C1CO1"),] ), ("F/C=C/I", [ ((1, 2, 3, 4), None, "F/C=C/I"), ((1, 2, 3), None, "FC=C"), ((1, 2, 3, 4), (0,), "F.C=CI"), ((1, 2, 3, 4), (1,), "FC.CI")] ), ] for smi, d in data: mol = pybel.readstring("smi", smi) for a, b, ans in d: nmol = ob.OBMol() bv = self.createBitVec(7, a) bondbv = None if b is None else self.createBitVec(5, b) ok = mol.OBMol.CopySubstructure(nmol, bv, bondbv) self.assertTrue(ok) if "@" not in ans and "/" not in ans: self.assertFalse(nmol.GetData(ob.StereoData)) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), ans)
def testAromaticityPreservedOnAtomDeletion(self): """Ensure that aromaticity is preserved on atom deleteion""" mol = pybel.readstring("smi", "c1ccccc1").OBMol mol.DeleteAtom(mol.GetFirstAtom()) self.assertTrue(mol.GetFirstAtom().IsAromatic()) mol.SetAromaticPerceived(False) self.assertFalse(mol.GetFirstAtom().IsAromatic())
def testDeleteHydrogens(self): """Don't suppress a hydrogen with an atom class""" smi = "C([H])([H])([H])[H:1]" mol = pybel.readstring("smi", smi) mol.OBMol.DeleteHydrogens() nsmi = mol.write("smi", opt={"a": True, "h": True}) self.assertEqual("C[H:1]", nsmi.rstrip())
def write_mol2(geo, outf): ftype = 'xyz' _str = geo.write_struct(None, ftype=ftype) mymols = list([pybel.readstring(ftype, _str)]) mymol = mymols[0] _stro = mymol.write('mol2') with open(outf, 'w') as fh: fh.write(_stro)
def testBasic(self): smis = ["C>N>O", "C>N>", ">N>O", ">N>", "C>>", ">>O", ">>"] for smi in smis: nsmi = pybel.readstring("smi", smi).write("smi").rstrip() self.assertEqual(smi, nsmi) badsmis = ["C>>N>O", ">>>", "C>N>O>", ">", ">N", "N>"] for smi in badsmis: self.assertRaises(IOError, pybel.readstring, "smi", smi)
def ReadMol(self, molstr: str = "", molformat: str = 'smi') -> None: """Read a molecular input string. :param molstr: input molecular string :param molformat: 3-letters code for openbabel supported format """ self.mol = pybel.readstring(molformat, molstr) self.rdmol = Chem.MolFromMolBlock(self.mol.write(format='sdf'))
def _max_atoms_in_mol_block(mol_block_list): max_num_atoms = -1024 for mol_block in mol_block_list: mol_str = '\n'.join(mol_block) mol = pybel.readstring('sdf', mol_str) if len(mol.atoms) > max_num_atoms: max_num_atoms = len(mol.atoms) return max(max_num_atoms, 0)
def add_hydrogen(self): from openbabel import pybel as pb mol_0d = pb.readstring("smi", "CCCC").OBMol self.assertEqual(len(pb.Molecule(mol_0d).atoms), 2) adaptor = BabelMolAdaptor(mol_0d) adaptor.add_hydrogen() self.assertEqual(len(adaptor.pymatgen_mol.sites), 14)
def testBasic(self): smis = ["C>N>O", "C>N>", ">N>O", ">N>", "C>>", ">>O", ">>"] for smi in smis: nsmi = pybel.readstring("smi", smi).write("smi").rstrip() self.assertEqual(smi, nsmi) badsmis = ["C>>N>O", ">>>", "C>N>O>", ">", ">N", "N>"] for smi in badsmis: self.assertRaises(IOError, pybel.readstring, "smi", smi)
def ReadMol(molstructure: str, molformat: str = 'smi') -> Chem.Mol: """Read molecular text of the specified format. :param molstructure: molecular text :param molformat: 3-letters code for openbabel supported format """ mol = pybel.readstring(molformat, molstructure) return mol
def testLPStereo(self): """Ensure that nitrogen and sulfur can support LP stereo""" data = ["[N@@](Cl)(Br)I", "Cl[N@@](Br)I", "[S@@](Cl)(Br)I", "Cl[S@@](Br)I"] for smi in data: mol = pybel.readstring("smi", smi) self.assertTrue(mol.OBMol.GetData(ob.StereoData)) nsmi = mol.write("smi").rstrip() self.assertEqual(smi, nsmi)
def testBasic(self): mol = pybel.readstring("smi", "ICBr") bv = self.createBitVec(4, (1, 3)) nmol = ob.OBMol() ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0) self.assertTrue(ok) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br]") bv = self.createBitVec(4, (2,)) ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0) self.assertTrue(ok) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br].[CH2]") mol = pybel.readstring("smi", "CCC") bv = self.createBitVec(4, (1,)) bondv = self.createBitVec(2, (1,)) nmol = ob.OBMol() ok = mol.OBMol.CopySubstructure(nmol, bv, bondv, 0) self.assertTrue(ok)
def testRoundtripThroughRXN(self): data = ["C>N>O", "C>>O", "C.N>>O", "C>>O.N", "C>>O", ">>O", "C>>", ">N>", ">>"] for rsmi in data: rxn = pybel.readstring("smi", rsmi).write("rxn") mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip() self.assertEqual(mrsmi, rsmi) # Test -G option, which changes the treatment of agents rsmi = "C>N>O" ans = {"agent": "C>N>O", "reactant": "C.N>>O", "product": "C>>O.N", "both": "C.N>>O.N", "ignore": "C>>O"} for option, result in ans.items(): rxn = pybel.readstring("smi", rsmi).write("rxn", opt={"G":option}) mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip() self.assertEqual(mrsmi, result)
def testIterators(self): """Basic check that at least two iterators are working""" mol = pybel.readstring("smi", "c1ccccc1C(=O)Cl") atoms = list(ob.OBMolAtomIter(mol.OBMol)) self.assertEqual(len(atoms), 9) elements = [atom.GetAtomicNum() for atom in atoms] self.assertEqual(elements, [6,6,6,6,6,6,6,8,17]) bonds = list(ob.OBMolBondIter(mol.OBMol)) self.assertEqual(len(bonds), 9)
def testTinkerXYZ(self): """Atom classes are written out as the atom types (though not currently read)""" smi = "[CH4:23]" mol = pybel.readstring("smi", smi) xyz = mol.write("txyz", opt={"c": True}) lines = xyz.split("\n") broken = lines[1].split() self.assertEqual("23", broken[-1].rstrip())
def testTinkerXYZ(self): """Atom classes are written out as the atom types (though not currently read)""" smi = "[CH4:23]" mol = pybel.readstring("smi", smi) xyz = mol.write("txyz", opt={"c": True}) lines = xyz.split("\n") broken = lines[1].split() self.assertEqual("23", broken[-1].rstrip())
def testIterators(self): """Basic check that at least two iterators are working""" mol = pybel.readstring("smi", "c1ccccc1C(=O)Cl") atoms = list(ob.OBMolAtomIter(mol.OBMol)) self.assertEqual(len(atoms), 9) elements = [atom.GetAtomicNum() for atom in atoms] self.assertEqual(elements, [6, 6, 6, 6, 6, 6, 6, 8, 17]) bonds = list(ob.OBMolBondIter(mol.OBMol)) self.assertEqual(len(bonds), 9)
def testKekulizationOfHypervalents(self): # We should support hypervalent aromatic S and N (the latter # as we write them) data = [("Cs1(=O)ccccn1", "CS1(=O)=NC=CC=C1"), ("n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12", "n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12")] for inp, out in data: mol = pybel.readstring("smi", inp) self.assertEqual(out, mol.write("smi").rstrip())
def testBasic(self): mol = pybel.readstring("smi", "ICBr") bv = self.createBitVec(4, (1, 3)) nmol = ob.OBMol() ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0) self.assertTrue(ok) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br]") bv = self.createBitVec(4, (2,)) ok = mol.OBMol.CopySubstructure(nmol, bv, None, 0) self.assertTrue(ok) self.assertEqual(pybel.Molecule(nmol).write("smi").rstrip(), "[I].[Br].[CH2]") mol = pybel.readstring("smi", "CCC") bv = self.createBitVec(4, (1,)) bondv = self.createBitVec(2, (1,)) nmol = ob.OBMol() ok = mol.OBMol.CopySubstructure(nmol, bv, bondv, 0) self.assertTrue(ok)
def testRoundtripThroughRXN(self): data = ["C>N>O", "C>>O", "C.N>>O", "C>>O.N", "C>>O", ">>O", "C>>", ">N>", ">>"] for rsmi in data: rxn = pybel.readstring("smi", rsmi).write("rxn") mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip() self.assertEqual(mrsmi, rsmi) # Test -G option, which changes the treatment of agents rsmi = "C>N>O" ans = {"agent": "C>N>O", "reactant": "C.N>>O", "product": "C>>O.N", "both": "C.N>>O.N", "ignore": "C>>O"} for option, result in ans.items(): rxn = pybel.readstring("smi", rsmi).write("rxn", opt={"G":option}) mrsmi = pybel.readstring("rxn", rxn).write("smi").rstrip() self.assertEqual(mrsmi, result)
def testInvalidRxn(self): """IsValid() should flag up invalid reaction data""" mol = pybel.readstring("smi", "CC>>O").OBMol facade = ob.OBReactionFacade(mol) self.assertTrue(facade.IsValid()) mol.SetIsReaction(False) self.assertFalse(facade.IsValid()) mol.SetIsReaction() self.assertTrue(facade.IsValid()) atom = mol.GetAtom(1) facade.SetRole(atom, 4) self.assertFalse(facade.IsValid()) # invalid role facade.SetRole(atom, ob.REACTANT) self.assertTrue(facade.IsValid()) data = atom.GetData("rxncomp") ob.toPairInteger(data).SetValue(-1) self.assertFalse(facade.IsValid()) # invalid rxn component id atom.DeleteData(data) self.assertFalse(atom.HasData("rxncomp")) self.assertFalse(facade.IsValid()) # data missing newdata = ob.OBPairData() newdata.SetAttribute("rxncomp") newdata.SetValue("1") atom.CloneData(newdata) self.assertTrue(atom.HasData("rxncomp")) self.assertFalse(facade.IsValid()) # wrong type of data # Connected component should not belong to two different # rxn components or two different reaction roles mol = pybel.readstring("smi", "CC>>O").OBMol facade = ob.OBReactionFacade(mol) self.assertTrue(facade.IsValid()) atom = mol.GetAtom(1) facade.SetComponentId(atom, 99) self.assertFalse(facade.IsValid()) facade.SetComponentId(atom, 1) self.assertTrue(facade.IsValid()) facade.SetRole(atom, ob.AGENT) self.assertFalse(facade.IsValid())
def testProper2DofFragments(self): """Check for proper handling of fragments in mcdl routines, see issue #1889""" mol = pybel.readstring("smi", "[H+].CC[O-].CC[O-]") mol.draw(show=False, update=True) dists = [ abs(a.coords[0] - b.coords[0]) + abs(a.coords[1] - b.coords[1]) for a, b in itertools.combinations(mol.atoms, 2) ] mindist = min(dists) self.assertTrue(mindist > 0.00001)
def testKekulizationOfHypervalents(self): # We should support hypervalent aromatic S and N (the latter # as we write them) data = [("Cs1(=O)ccccn1", "CS1(=O)=NC=CC=C1"), ("n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12", "n1c2-c(c3cccc4cccc2c34)n(=N)c2ccccc12")] for inp, out in data: mol = pybel.readstring("smi", inp) self.assertEqual(out, mol.write("smi").rstrip())
def testOldRingInformationIsWipedOnReperception(self): """Previously, the code that identified ring atoms and bonds did not set the flags of non-ring atoms. This meant that no matter what you did to the structure, once a ring-atom, always a ring atom.""" mol = pybel.readstring("smi", "c1ccccc1") atom = mol.atoms[0].OBAtom self.assertTrue(atom.IsInRing()) # trigger perception mol.OBMol.DeleteAtom(mol.atoms[-1].OBAtom) self.assertFalse(atom.IsInRing()) # this used to return True