def check_database(self, mols=None, filename=None): """ Check database contents. Parameters ---------- mols : list, optional Molecules that should be in the database. Defaults to self.mols. filename : str, optional Existing database filename. """ if mols is None: mols = self.mols if filename is not None: database = MoleculeDatabase() database.load(filename) else: database = self.database # check for appropriate length assert len(database) == len(mols) # check that SMILES are what we expect for mol in mols: assert database.engine.get_smiles(mol) in database
def test_update(self): """ Test updating an existing database. """ _, database_filename = tempfile.mkstemp(dir=self.temp_dir) database = MoleculeDatabase() database.add_mol(self.mols[0]) database.save(database_filename) self.check_output(["-i", self.input_filename, "-o", self.output_filename, "-d", database_filename])
def check_output(self, input_args): """ Run main and examine the resulting database. Parameters ---------- args : list Command-line arguments. """ args = parse_args(input_args) main(args.input, args.output, args.database, args.stereo_from_3d) database = MoleculeDatabase() database.load(args.output) assert len(database) == len(self.mols) return database
def get_smiles(filename, assign_stereo_from_3d=False): """ Get SMILES for molecules. Parameters ---------- filename : str Input molecule filename. assign_stereo_from_3d : bool, optional (default False) Assign stereochemistry from 3D coordinates. """ database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d) with serial.MolReader().open(filename) as reader: for mol in reader: database.add_mol(mol) return list(database.smiles)
def setUp(self): """ Set up tests. """ smiles = [ 'CC(=O)OC1=CC=CC=C1C(=O)O', 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O', 'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F' ] names = ['aspirin', 'ibuprofen', 'celecoxib'] self.cids = [2244, 3672, 2662] self.mols = [] for s, n in zip(smiles, names): mol = Chem.MolFromSmiles(s) mol.SetProp('_Name', n) self.mols.append(mol) self.temp_dir = tempfile.mkdtemp() self.database = MoleculeDatabase()
def test_update(self): """ Test updating an existing database. """ _, database_filename = tempfile.mkstemp(dir=self.temp_dir) database = MoleculeDatabase() database.add_mol(self.mols[0]) database.save(database_filename) self.check_output([ '-i', self.input_filename, '-o', self.output_filename, '-d', database_filename ])
def setUp(self): """ Set up tests. """ smiles = [ "CC(=O)OC1=CC=CC=C1C(=O)O", "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", "CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F", ] names = ["aspirin", "ibuprofen", "celecoxib"] self.cids = [2244, 3672, 2662] self.mols = [] for s, n in zip(smiles, names): mol = Chem.MolFromSmiles(s) mol.SetProp("_Name", n) self.mols.append(mol) self.temp_dir = tempfile.mkdtemp() self.database = MoleculeDatabase()
def main(input_filenames, output_filename, database_filename=None, assign_stereo_from_3d=False): """ Update or create a molecule database. Parameters ---------- input_filenames : list Input molecule filename(s). output_filename : str Output filename. database_filename : str, optional Existing database to update. assign_stereo_from_3d : bool, optional (default False) Whether to assign stereochemistry from 3D coordinates. """ database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d) if database_filename is not None: database.load(database_filename) initial_size = len(database) for filename in input_filenames: print filename with serial.MolReader().open(filename) as reader: for mol in reader: try: database.add_mol(mol) except ValueError: if mol.HasProp('_Name'): print 'Skipping {}'.format(mol.GetProp('_Name')) else: print 'Skipping {}'.format( Chem.MolToSmiles(mol, isomericSmiles=True)) final_size = len(database) print '{} molecules added to the database'.format(final_size - initial_size) database.save(output_filename)
def main(input_filenames, output_filename, database_filename=None, assign_stereo_from_3d=False): """ Update or create a molecule database. Parameters ---------- input_filenames : list Input molecule filename(s). output_filename : str Output filename. database_filename : str, optional Existing database to update. assign_stereo_from_3d : bool, optional (default False) Whether to assign stereochemistry from 3D coordinates. """ database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d) if database_filename is not None: database.load(database_filename) initial_size = len(database) for filename in input_filenames: print filename with serial.MolReader().open(filename) as reader: for mol in reader: try: database.add_mol(mol) except ValueError: if mol.HasProp('_Name'): print 'Skipping {}'.format(mol.GetProp('_Name')) else: print 'Skipping {}'.format( Chem.MolToSmiles(mol, isomericSmiles=True)) final_size = len(database) print '{} molecules added to the database'.format( final_size - initial_size) database.save(output_filename)
class TestMoleculeDatabase(unittest.TestCase): """ Tests for MoleculeDatabase. """ def setUp(self): """ Set up tests. """ smiles = [ "CC(=O)OC1=CC=CC=C1C(=O)O", "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", "CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F", ] names = ["aspirin", "ibuprofen", "celecoxib"] self.cids = [2244, 3672, 2662] self.mols = [] for s, n in zip(smiles, names): mol = Chem.MolFromSmiles(s) mol.SetProp("_Name", n) self.mols.append(mol) self.temp_dir = tempfile.mkdtemp() self.database = MoleculeDatabase() def tearDown(self): """ Clean up tests. """ shutil.rmtree(self.temp_dir) def check_database(self, mols=None, filename=None): """ Check database contents. Parameters ---------- mols : list, optional Molecules that should be in the database. Defaults to self.mols. filename : str, optional Existing database filename. """ if mols is None: mols = self.mols if filename is not None: database = MoleculeDatabase() database.load(filename) else: database = self.database # check for appropriate length assert len(database) == len(mols) # check that SMILES are what we expect for mol in mols: assert database.engine.get_smiles(mol) in database def test_load(self): """ Test MoleculeDatabase.load. """ _, filename = tempfile.mkstemp(dir=self.temp_dir) with open(filename, "wb") as f: f.write("{}\n".format(self.database.engine.get_smiles(self.mols[0]))) self.check_database([self.mols[0]], filename) def test_load_gz(self): """ Test MoleculeDatabase.load with gzipped input. """ _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".gz") with gzip.open(filename, "wb") as f: f.write("{}\n".format(self.database.engine.get_smiles(self.mols[0]))) self.check_database([self.mols[0]], filename) def test_load_bogus(self): """ Test failure on loading a bogus dataset. """ _, filename = tempfile.mkstemp(dir=self.temp_dir) with open(filename, "wb") as f: f.write("bogus\n") try: self.database.load(filename) raise AssertionError except ValueError: pass def test_save(self): """ Test MoleculeDatabase.save. """ _, filename = tempfile.mkstemp(dir=self.temp_dir) for mol in self.mols: self.database.add_mol(mol) self.database.save(filename) self.check_database(filename=filename) with open(filename) as f: assert len(f.readlines()) == len(self.mols) def test_save_gz(self): """ Test MoleculeDatabase.save with gzipped output. """ _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".gz") for mol in self.mols: self.database.add_mol(mol) self.database.save(filename) self.check_database(filename=filename) with gzip.open(filename) as f: assert len(f.readlines()) == len(self.mols) def test_add_mol(self): """ Test MoleculeDatabase.add_mol. """ for mol in self.mols: self.database.add_mol(mol) self.check_database() def test_add_mol_duplicate(self): """ Test MoleculeDatabase.add_mol with a duplicate molecule. """ for mol in self.mols: # add once self.database.add_mol(mol) for mol in self.mols: # add twice self.database.add_mol(mol) self.check_database()
class TestMoleculeDatabase(unittest.TestCase): """ Tests for MoleculeDatabase. """ def setUp(self): """ Set up tests. """ smiles = [ 'CC(=O)OC1=CC=CC=C1C(=O)O', 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O', 'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F' ] names = ['aspirin', 'ibuprofen', 'celecoxib'] self.cids = [2244, 3672, 2662] self.mols = [] for s, n in zip(smiles, names): mol = Chem.MolFromSmiles(s) mol.SetProp('_Name', n) self.mols.append(mol) self.temp_dir = tempfile.mkdtemp() self.database = MoleculeDatabase() def tearDown(self): """ Clean up tests. """ shutil.rmtree(self.temp_dir) def check_database(self, mols=None, filename=None): """ Check database contents. Parameters ---------- mols : list, optional Molecules that should be in the database. Defaults to self.mols. filename : str, optional Existing database filename. """ if mols is None: mols = self.mols if filename is not None: database = MoleculeDatabase() database.load(filename) else: database = self.database # check for appropriate length assert len(database) == len(mols) # check that SMILES are what we expect for mol in mols: assert database.engine.get_smiles(mol) in database def test_load(self): """ Test MoleculeDatabase.load. """ _, filename = tempfile.mkstemp(dir=self.temp_dir) with open(filename, 'wb') as f: f.write('{}\n'.format(self.database.engine.get_smiles( self.mols[0]))) self.check_database([self.mols[0]], filename) def test_load_gz(self): """ Test MoleculeDatabase.load with gzipped input. """ _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.gz') with gzip.open(filename, 'wb') as f: f.write('{}\n'.format(self.database.engine.get_smiles( self.mols[0]))) self.check_database([self.mols[0]], filename) def test_load_bogus(self): """ Test failure on loading a bogus dataset. """ _, filename = tempfile.mkstemp(dir=self.temp_dir) with open(filename, 'wb') as f: f.write('bogus\n') try: self.database.load(filename) raise AssertionError except ValueError: pass def test_save(self): """ Test MoleculeDatabase.save. """ _, filename = tempfile.mkstemp(dir=self.temp_dir) for mol in self.mols: self.database.add_mol(mol) self.database.save(filename) self.check_database(filename=filename) with open(filename) as f: assert len(f.readlines()) == len(self.mols) def test_save_gz(self): """ Test MoleculeDatabase.save with gzipped output. """ _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.gz') for mol in self.mols: self.database.add_mol(mol) self.database.save(filename) self.check_database(filename=filename) with gzip.open(filename) as f: assert len(f.readlines()) == len(self.mols) def test_add_mol(self): """ Test MoleculeDatabase.add_mol. """ for mol in self.mols: self.database.add_mol(mol) self.check_database() def test_add_mol_duplicate(self): """ Test MoleculeDatabase.add_mol with a duplicate molecule. """ for mol in self.mols: # add once self.database.add_mol(mol) for mol in self.mols: # add twice self.database.add_mol(mol) self.check_database()