def check_database(self, mols=None, filename=None):
        """
        Check database contents.

        Parameters
        ----------
        mols : list, optional
            Molecules that should be in the database. Defaults to self.mols.
        filename : str, optional
            Existing database filename.
        """
        if mols is None:
            mols = self.mols
        if filename is not None:
            database = MoleculeDatabase()
            database.load(filename)
        else:
            database = self.database

        # check for appropriate length
        assert len(database) == len(mols)

        # check that SMILES are what we expect
        for mol in mols:
            assert database.engine.get_smiles(mol) in database
    def check_database(self, mols=None, filename=None):
        """
        Check database contents.

        Parameters
        ----------
        mols : list, optional
            Molecules that should be in the database. Defaults to self.mols.
        filename : str, optional
            Existing database filename.
        """
        if mols is None:
            mols = self.mols
        if filename is not None:
            database = MoleculeDatabase()
            database.load(filename)
        else:
            database = self.database

        # check for appropriate length
        assert len(database) == len(mols)

        # check that SMILES are what we expect
        for mol in mols:
            assert database.engine.get_smiles(mol) in database
 def test_update(self):
     """
     Test updating an existing database.
     """
     _, database_filename = tempfile.mkstemp(dir=self.temp_dir)
     database = MoleculeDatabase()
     database.add_mol(self.mols[0])
     database.save(database_filename)
     self.check_output(["-i", self.input_filename, "-o", self.output_filename, "-d", database_filename])
Example #4
0
    def check_output(self, input_args):
        """
        Run main and examine the resulting database.

        Parameters
        ----------
        args : list
            Command-line arguments.
        """
        args = parse_args(input_args)
        main(args.input, args.output, args.database, args.stereo_from_3d)
        database = MoleculeDatabase()
        database.load(args.output)
        assert len(database) == len(self.mols)
        return database
    def check_output(self, input_args):
        """
        Run main and examine the resulting database.

        Parameters
        ----------
        args : list
            Command-line arguments.
        """
        args = parse_args(input_args)
        main(args.input, args.output, args.database, args.stereo_from_3d)
        database = MoleculeDatabase()
        database.load(args.output)
        assert len(database) == len(self.mols)
        return database
def get_smiles(filename, assign_stereo_from_3d=False):
    """
    Get SMILES for molecules.

    Parameters
    ----------
    filename : str
        Input molecule filename.
    assign_stereo_from_3d : bool, optional (default False)
        Assign stereochemistry from 3D coordinates.
    """
    database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d)
    with serial.MolReader().open(filename) as reader:
        for mol in reader:
            database.add_mol(mol)
    return list(database.smiles)
def get_smiles(filename, assign_stereo_from_3d=False):
    """
    Get SMILES for molecules.

    Parameters
    ----------
    filename : str
        Input molecule filename.
    assign_stereo_from_3d : bool, optional (default False)
        Assign stereochemistry from 3D coordinates.
    """
    database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d)
    with serial.MolReader().open(filename) as reader:
        for mol in reader:
            database.add_mol(mol)
    return list(database.smiles)
 def setUp(self):
     """
     Set up tests.
     """
     smiles = [
         'CC(=O)OC1=CC=CC=C1C(=O)O', 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O',
         'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F'
     ]
     names = ['aspirin', 'ibuprofen', 'celecoxib']
     self.cids = [2244, 3672, 2662]
     self.mols = []
     for s, n in zip(smiles, names):
         mol = Chem.MolFromSmiles(s)
         mol.SetProp('_Name', n)
         self.mols.append(mol)
     self.temp_dir = tempfile.mkdtemp()
     self.database = MoleculeDatabase()
Example #9
0
 def test_update(self):
     """
     Test updating an existing database.
     """
     _, database_filename = tempfile.mkstemp(dir=self.temp_dir)
     database = MoleculeDatabase()
     database.add_mol(self.mols[0])
     database.save(database_filename)
     self.check_output([
         '-i', self.input_filename, '-o', self.output_filename, '-d',
         database_filename
     ])
 def setUp(self):
     """
     Set up tests.
     """
     smiles = [
         "CC(=O)OC1=CC=CC=C1C(=O)O",
         "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O",
         "CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F",
     ]
     names = ["aspirin", "ibuprofen", "celecoxib"]
     self.cids = [2244, 3672, 2662]
     self.mols = []
     for s, n in zip(smiles, names):
         mol = Chem.MolFromSmiles(s)
         mol.SetProp("_Name", n)
         self.mols.append(mol)
     self.temp_dir = tempfile.mkdtemp()
     self.database = MoleculeDatabase()
Example #11
0
def main(input_filenames,
         output_filename,
         database_filename=None,
         assign_stereo_from_3d=False):
    """
    Update or create a molecule database.

    Parameters
    ----------
    input_filenames : list
        Input molecule filename(s).
    output_filename : str
        Output filename.
    database_filename : str, optional
        Existing database to update.
    assign_stereo_from_3d : bool, optional (default False)
        Whether to assign stereochemistry from 3D coordinates.
    """
    database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d)
    if database_filename is not None:
        database.load(database_filename)
    initial_size = len(database)
    for filename in input_filenames:
        print filename
        with serial.MolReader().open(filename) as reader:
            for mol in reader:
                try:
                    database.add_mol(mol)
                except ValueError:
                    if mol.HasProp('_Name'):
                        print 'Skipping {}'.format(mol.GetProp('_Name'))
                    else:
                        print 'Skipping {}'.format(
                            Chem.MolToSmiles(mol, isomericSmiles=True))
    final_size = len(database)
    print '{} molecules added to the database'.format(final_size -
                                                      initial_size)
    database.save(output_filename)
Example #12
0
def main(input_filenames, output_filename, database_filename=None,
         assign_stereo_from_3d=False):
    """
    Update or create a molecule database.

    Parameters
    ----------
    input_filenames : list
        Input molecule filename(s).
    output_filename : str
        Output filename.
    database_filename : str, optional
        Existing database to update.
    assign_stereo_from_3d : bool, optional (default False)
        Whether to assign stereochemistry from 3D coordinates.
    """
    database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d)
    if database_filename is not None:
        database.load(database_filename)
    initial_size = len(database)
    for filename in input_filenames:
        print filename
        with serial.MolReader().open(filename) as reader:
            for mol in reader:
                try:
                    database.add_mol(mol)
                except ValueError:
                    if mol.HasProp('_Name'):
                        print 'Skipping {}'.format(mol.GetProp('_Name'))
                    else:
                        print 'Skipping {}'.format(
                            Chem.MolToSmiles(mol, isomericSmiles=True))
    final_size = len(database)
    print '{} molecules added to the database'.format(
        final_size - initial_size)
    database.save(output_filename)
class TestMoleculeDatabase(unittest.TestCase):
    """
    Tests for MoleculeDatabase.
    """

    def setUp(self):
        """
        Set up tests.
        """
        smiles = [
            "CC(=O)OC1=CC=CC=C1C(=O)O",
            "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O",
            "CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F",
        ]
        names = ["aspirin", "ibuprofen", "celecoxib"]
        self.cids = [2244, 3672, 2662]
        self.mols = []
        for s, n in zip(smiles, names):
            mol = Chem.MolFromSmiles(s)
            mol.SetProp("_Name", n)
            self.mols.append(mol)
        self.temp_dir = tempfile.mkdtemp()
        self.database = MoleculeDatabase()

    def tearDown(self):
        """
        Clean up tests.
        """
        shutil.rmtree(self.temp_dir)

    def check_database(self, mols=None, filename=None):
        """
        Check database contents.

        Parameters
        ----------
        mols : list, optional
            Molecules that should be in the database. Defaults to self.mols.
        filename : str, optional
            Existing database filename.
        """
        if mols is None:
            mols = self.mols
        if filename is not None:
            database = MoleculeDatabase()
            database.load(filename)
        else:
            database = self.database

        # check for appropriate length
        assert len(database) == len(mols)

        # check that SMILES are what we expect
        for mol in mols:
            assert database.engine.get_smiles(mol) in database

    def test_load(self):
        """
        Test MoleculeDatabase.load.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir)
        with open(filename, "wb") as f:
            f.write("{}\n".format(self.database.engine.get_smiles(self.mols[0])))
        self.check_database([self.mols[0]], filename)

    def test_load_gz(self):
        """
        Test MoleculeDatabase.load with gzipped input.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".gz")
        with gzip.open(filename, "wb") as f:
            f.write("{}\n".format(self.database.engine.get_smiles(self.mols[0])))
        self.check_database([self.mols[0]], filename)

    def test_load_bogus(self):
        """
        Test failure on loading a bogus dataset.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir)
        with open(filename, "wb") as f:
            f.write("bogus\n")
        try:
            self.database.load(filename)
            raise AssertionError
        except ValueError:
            pass

    def test_save(self):
        """
        Test MoleculeDatabase.save.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir)
        for mol in self.mols:
            self.database.add_mol(mol)
        self.database.save(filename)
        self.check_database(filename=filename)
        with open(filename) as f:
            assert len(f.readlines()) == len(self.mols)

    def test_save_gz(self):
        """
        Test MoleculeDatabase.save with gzipped output.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix=".gz")
        for mol in self.mols:
            self.database.add_mol(mol)
        self.database.save(filename)
        self.check_database(filename=filename)
        with gzip.open(filename) as f:
            assert len(f.readlines()) == len(self.mols)

    def test_add_mol(self):
        """
        Test MoleculeDatabase.add_mol.
        """
        for mol in self.mols:
            self.database.add_mol(mol)
        self.check_database()

    def test_add_mol_duplicate(self):
        """
        Test MoleculeDatabase.add_mol with a duplicate molecule.
        """
        for mol in self.mols:  # add once
            self.database.add_mol(mol)
        for mol in self.mols:  # add twice
            self.database.add_mol(mol)
        self.check_database()
class TestMoleculeDatabase(unittest.TestCase):
    """
    Tests for MoleculeDatabase.
    """
    def setUp(self):
        """
        Set up tests.
        """
        smiles = [
            'CC(=O)OC1=CC=CC=C1C(=O)O', 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O',
            'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F'
        ]
        names = ['aspirin', 'ibuprofen', 'celecoxib']
        self.cids = [2244, 3672, 2662]
        self.mols = []
        for s, n in zip(smiles, names):
            mol = Chem.MolFromSmiles(s)
            mol.SetProp('_Name', n)
            self.mols.append(mol)
        self.temp_dir = tempfile.mkdtemp()
        self.database = MoleculeDatabase()

    def tearDown(self):
        """
        Clean up tests.
        """
        shutil.rmtree(self.temp_dir)

    def check_database(self, mols=None, filename=None):
        """
        Check database contents.

        Parameters
        ----------
        mols : list, optional
            Molecules that should be in the database. Defaults to self.mols.
        filename : str, optional
            Existing database filename.
        """
        if mols is None:
            mols = self.mols
        if filename is not None:
            database = MoleculeDatabase()
            database.load(filename)
        else:
            database = self.database

        # check for appropriate length
        assert len(database) == len(mols)

        # check that SMILES are what we expect
        for mol in mols:
            assert database.engine.get_smiles(mol) in database

    def test_load(self):
        """
        Test MoleculeDatabase.load.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir)
        with open(filename, 'wb') as f:
            f.write('{}\n'.format(self.database.engine.get_smiles(
                self.mols[0])))
        self.check_database([self.mols[0]], filename)

    def test_load_gz(self):
        """
        Test MoleculeDatabase.load with gzipped input.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.gz')
        with gzip.open(filename, 'wb') as f:
            f.write('{}\n'.format(self.database.engine.get_smiles(
                self.mols[0])))
        self.check_database([self.mols[0]], filename)

    def test_load_bogus(self):
        """
        Test failure on loading a bogus dataset.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir)
        with open(filename, 'wb') as f:
            f.write('bogus\n')
        try:
            self.database.load(filename)
            raise AssertionError
        except ValueError:
            pass

    def test_save(self):
        """
        Test MoleculeDatabase.save.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir)
        for mol in self.mols:
            self.database.add_mol(mol)
        self.database.save(filename)
        self.check_database(filename=filename)
        with open(filename) as f:
            assert len(f.readlines()) == len(self.mols)

    def test_save_gz(self):
        """
        Test MoleculeDatabase.save with gzipped output.
        """
        _, filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.gz')
        for mol in self.mols:
            self.database.add_mol(mol)
        self.database.save(filename)
        self.check_database(filename=filename)
        with gzip.open(filename) as f:
            assert len(f.readlines()) == len(self.mols)

    def test_add_mol(self):
        """
        Test MoleculeDatabase.add_mol.
        """
        for mol in self.mols:
            self.database.add_mol(mol)
        self.check_database()

    def test_add_mol_duplicate(self):
        """
        Test MoleculeDatabase.add_mol with a duplicate molecule.
        """
        for mol in self.mols:  # add once
            self.database.add_mol(mol)
        for mol in self.mols:  # add twice
            self.database.add_mol(mol)
        self.check_database()