Beispiel #1
0
    def setUp(self):
        """
        Set up tests.
        """
        self.reader = serial.MolReader()

        # generate molecules
        smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O',
                  'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F']
        names = ['aspirin', 'ibuprofen', 'celecoxib']
        self.mols = []
        for s, n in zip(smiles, names):
            mol = Chem.MolFromSmiles(s)
            mol.SetProp('_Name', n)
            AllChem.Compute2DCoords(mol)
            self.mols.append(mol)

        # write molecules to file
        self.temp_dir = tempfile.mkdtemp()
        writer = serial.MolWriter()
        _, self.filename = tempfile.mkstemp(dir=self.temp_dir,
                                            suffix='.sdf.gz')
        with writer.open(self.filename) as w:
            w.write(self.mols)

        self.sharder = DatasetSharder(filename=self.filename,
                                      write_shards=False)
        self.reader = serial.MolReader()
Beispiel #2
0
    def test_not_close_other(self):
        """
        Make sure MolIO doesn't close files it didn't open.
        """
        _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir)
        with open(filename) as f:
            reader = serial.MolReader(f, mol_format='sdf')
            reader.close()
            assert not f.closed

        # also test the context manager
        with open(filename) as g:
            with serial.MolReader(g, mol_format='sdf'):
                pass
            assert not g.closed
Beispiel #3
0
 def read_data(self):
     """
     Read labeled molecules.
     """
     with serial.MolReader().open(self.filename) as reader:
         mols = list(reader)
     return mols
Beispiel #4
0
 def read_mols_from_file(self):
     """
     Read molecules from a file.
     """
     with serial.MolReader().open(self.filename) as reader:
         for mol in reader.get_mols():
             yield mol
Beispiel #5
0
 def test_is_a_salt(self):
     """
     Test that a molecule that _is_ a salt is not returned empty.
     """
     smiles = 'C(=CC(=O)O)C(=O)O'
     reader = serial.MolReader(StringIO(smiles), 'smi', remove_salts=True)
     mols = list(reader.get_mols())
     assert len(mols) == 1 and mols[0].GetNumAtoms()
Beispiel #6
0
 def test_skip_failures(self):
     """
     Test skip read failures.
     """
     smiles = 'CO(C)C'
     reader = serial.MolReader(StringIO(smiles), 'smi')
     mols = list(reader.get_mols())
     assert len(mols) == 0
Beispiel #7
0
 def test_no_remove_salts(self):
     """
     Test salt retention.
     """
     _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir)
     with open(filename, 'wb') as f:
         for mol in [self.aspirin_sodium, self.levalbuterol_hcl]:
             f.write(Chem.MolToMolBlock(mol))
             f.write('$$$$\n')  # molecule delimiter
     ref_mols = [self.aspirin_sodium, self.levalbuterol_hcl]
     self.reader = serial.MolReader(remove_salts=False)
     self.reader.open(filename)
     mols = self.reader.get_mols()
     mols = list(mols)
     assert len(mols) == 2
     self.reader = serial.MolReader(remove_salts=True)
     for mol, ref_mol in zip(mols, ref_mols):
         assert mol.ToBinary() == ref_mol.ToBinary()
         desalted = self.reader.clean_mol(ref_mol)
         assert mol.GetNumAtoms() > desalted.GetNumAtoms()
Beispiel #8
0
 def test_remove_hydrogens(self):
     """
     Test hydrogen removal.
     """
     _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir)
     with open(filename, 'wb') as f:
         f.write(Chem.MolToMolBlock(self.aspirin_h))
     reader = serial.MolReader(remove_hydrogens=True)
     reader.open(filename)
     mols = reader.get_mols()
     assert mols.next().ToBinary() == self.aspirin.ToBinary()
Beispiel #9
0
 def test_read_compressed_file_like(self):
     """
     Read from a file-like object using gzip.
     """
     _, filename = tempfile.mkstemp(suffix='.sdf.gz', dir=self.temp_dir)
     with gzip.open(filename, 'wb') as f:
         f.write(Chem.MolToMolBlock(self.aspirin))
     with gzip.open(filename) as f:
         reader = serial.MolReader(f, mol_format='sdf')
         mols = reader.get_mols()
         assert mols.next().ToBinary() == self.aspirin.ToBinary()
Beispiel #10
0
 def test_no_remove_hydrogens(self):
     """
     Test hydrogen retention.
     """
     _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir)
     with open(filename, 'wb') as f:
         f.write(Chem.MolToMolBlock(self.aspirin_h))
     reader = serial.MolReader(remove_hydrogens=False, remove_salts=False)
     reader.open(filename)
     mols = reader.get_mols()
     # FIXME get ToBinary test to work
     # assert mols.next().ToBinary() == self.aspirin_h.ToBinary()
     assert Chem.MolToMolBlock(mols.next()) == Chem.MolToMolBlock(
         self.aspirin_h)
def get_smiles(filename, assign_stereo_from_3d=False):
    """
    Get SMILES for molecules.

    Parameters
    ----------
    filename : str
        Input molecule filename.
    assign_stereo_from_3d : bool, optional (default False)
        Assign stereochemistry from 3D coordinates.
    """
    database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d)
    with serial.MolReader().open(filename) as reader:
        for mol in reader:
            database.add_mol(mol)
    return list(database.smiles)
Beispiel #12
0
    def _ionize_3d(self, mol):
        """
        Ionize a molecule while preserving conformers.

        Parameters
        ----------
        mol : RDMol
            Molecule.
        """
        assert mol.GetNumConformers() > 0
        sdf = ''
        for conf in mol.GetConformers():
            sdf += Chem.MolToMolBlock(mol, confId=conf.GetId(),
                                      includeStereo=True)
            sdf += '$$$$\n'
        args = ['obabel', '-i', 'sdf', '-o', 'sdf', '-p', str(self.pH)]
        p = subprocess.Popen(args, stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        ionized_sdf, _ = p.communicate(sdf)
        reader = serial.MolReader(StringIO(ionized_sdf), mol_format='sdf',
                                  remove_salts=False)  # no changes
        try:
            mols = list(reader.get_mols())
        except RuntimeError as e:  # catch pre-condition violations
            raise IonizerError(e.message)

        # catch ionizer failure
        if len(mols) == 0:
            raise IonizerError(mol)

        # detection of stereochemistry based on 3D coordinates might result
        # in issues when attempting to recombine ionized conformers, but we
        # merge them anyway
        if len(mols) == 1:
            ionized_mol, = mols
        else:
            ionized_mol = mols[0]
            for other in mols[1:]:
                for conf in other.GetConformers():
                    ionized_mol.AddConformer(conf, assignId=True)
        return ionized_mol
Beispiel #13
0
    def setUp(self):
        """
        Write SDF and SMILES molecules to temporary files.
        """
        self.temp_dir = tempfile.mkdtemp()

        # aspirin
        self.aspirin = self._get_mol_from_smiles('CC(=O)OC1=CC=CC=C1C(=O)O',
                                                 'aspirin')
        self.aspirin_h = Chem.AddHs(self.aspirin)
        self.aspirin_sodium = self._get_mol_from_smiles(
            'CC(=O)OC1=CC=CC=C1C(=O)[O-].[Na+]', 'aspirin sodium')

        # levalbuterol (chiral)
        self.levalbuterol = self._get_mol_from_smiles(
            'CC(C)(C)NC[C@@H](C1=CC(=C(C=C1)O)CO)O', 'levalbuterol')
        self.levalbuterol_hcl = self._get_mol_from_smiles(
            'CC(C)(C)NC[C@@H](C1=CC(=C(C=C1)O)CO)O.Cl',
            'levalbuterol hydrochloride')

        self.ref_mols = [self.aspirin, self.levalbuterol]
        self.reader = serial.MolReader(compute_2d_coords=False)
Beispiel #14
0
def main(input_filenames,
         output_filename,
         database_filename=None,
         assign_stereo_from_3d=False):
    """
    Update or create a molecule database.

    Parameters
    ----------
    input_filenames : list
        Input molecule filename(s).
    output_filename : str
        Output filename.
    database_filename : str, optional
        Existing database to update.
    assign_stereo_from_3d : bool, optional (default False)
        Whether to assign stereochemistry from 3D coordinates.
    """
    database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d)
    if database_filename is not None:
        database.load(database_filename)
    initial_size = len(database)
    for filename in input_filenames:
        print filename
        with serial.MolReader().open(filename) as reader:
            for mol in reader:
                try:
                    database.add_mol(mol)
                except ValueError:
                    if mol.HasProp('_Name'):
                        print 'Skipping {}'.format(mol.GetProp('_Name'))
                    else:
                        print 'Skipping {}'.format(
                            Chem.MolToSmiles(mol, isomericSmiles=True))
    final_size = len(database)
    print '{} molecules added to the database'.format(final_size -
                                                      initial_size)
    database.save(output_filename)
Beispiel #15
0
def read_mols(input_filename):
    """
    Read molecules from an input file and extract names.

    Parameters
    ----------
    input_filename : str
        Filename containing molecules.
    """
    print "Reading molecules..."
    reader = serial.MolReader()
    reader.open(input_filename)
    mols = []
    names = []
    for mol in reader.get_mols():
        mols.append(mol)
        if mol.HasProp('_Name'):
            names.append(mol.GetProp('_Name'))
        else:
            names.append(None)
    reader.close()
    mols = np.asarray(mols)
    names = np.asarray(names)
    return mols, names