コード例 #1
0
def build_benchmark_check_rdkmols_catalog(mmapdir, molit=read_labelled_only_smiles, checks=False, overwrite=False):
    """Builds a memmapped catalog {molid->rdkbytes} from a (molid, smiles) iterator.
    tests it and compares to sequential recreation of the molecules from smiles.
    """

    # Build the catalog
    info('Building %s catalog...' % mmapdir)
    start = time()
    mmm = MemMappedMols(mmapdir)
    if not overwrite and mmm.has_catalog():
        info('Already computed, skipping.')
    else:
        mmm.save_from_smiles_iterator(molit())
    info('Time taken to build the memmapped file: %.2f seconds' % (time() - start))

    if not checks:
        return

    # Load the catalog
    mmms = MemMappedMols(mmapdir)

    # Lame benchmark - memmapped contiguous
    info('Benchmarking contiguous memmap reading')
    start = time()
    molcount = 0
    for molid in mmms.molids():
        mmms.mol(molid)
        molcount += 1
    info('Time taken to read the memmapped %d mols (contiguous): %.2f seconds' % (molcount, time() - start))

    info('Benchmarking random memmap reading')
    start = time()
    molcount = 0
    for molid in set(mmms.molids()):
        mmms.mol(molid)
        molcount += 1
    info('Time taken to read the memmapped %d mols (random): %.2f seconds' % (molcount, time() - start))

    # Lame benchmark - from smiles
    info('Benchmarking reading from the original file')
    start = time()
    molcount = 0
    for _, smiles in molit():
        Chem.MolFromSmiles(smiles)
        molcount += 1
    info('Time taken to read the smiled %d mols: %.2f seconds' % (molcount, time() - start))

    # Exhaustive linear test that all mols are correctly stored
    info('Making sure that all is OKish')
    for molid, smiles in molit():
        emol = Chem.MolFromSmiles(smiles)
        if emol is None:
            if not mmms.mol(molid) is None:
                warning('Molecule %s with original smiles %s should not be parsed from the binary store' %
                        (molid, smiles))
        else:
            if not Chem.MolToSmiles(emol) == Chem.MolToSmiles(mmms.mol(molid)):
                warning('Molecule %s with original smiles %s do not reconstruct properly: \n\t(%s != %s)' %
                        (molid, smiles, Chem.MolToSmiles(emol), Chem.MolToSmiles(mmms.mol(molid))))
    info('All is OKish')
コード例 #2
0
def to_rdkit_mol(smiles, molid=None, sanitize=True, to2D=False, to3D=False, toPropertyMol=False):
    """Converts a smiles string into an RDKit molecule."""
    mol = Chem.MolFromSmiles(smiles, sanitize=sanitize)
    if mol is None:
        if molid is None:
            warning('RDKit cannot create a molecule from smiles %s' % smiles)
        else:
            warning('RDKit cannot create molecule %s from smiles %s' % (molid, smiles))
        return mol
    if to3D:
        AllChem.EmbedMolecule(mol)
        AllChem.UFFOptimizeMolecule(mol)
    elif to2D:
        AllChem.Compute2DCoords(mol)
    if toPropertyMol:
        return PropertyMol(mol)
    return mol
コード例 #3
0
ファイル: rdkit_utils.py プロジェクト: sdvillal/ccl-malaria
def to_rdkit_mol(mol_repr, molid=None, instantiator=Chem.MolFromSmiles, to2D=False, to3D=False, toPropertyMol=False):
    """
    Converts a molecular representation (e.g. smiles string) into an RDKit molecule.
    Allows to perform common postprocessing operations on the resulting molecule.
    """
    if not isinstance(mol_repr, Chem.Mol):
        mol = instantiator(mol_repr)
    else:
        mol = mol_repr
    if mol is None:
        if molid is None:
            warning('RDKit cannot create a molecule from %r' % mol_repr)
        else:
            warning('RDKit cannot create molecule %s from %r' % (molid, mol_repr))
        return mol
    if to3D:
        AllChem.EmbedMolecule(mol)
        AllChem.UFFOptimizeMolecule(mol)
    elif to2D:
        AllChem.Compute2DCoords(mol)
    if toPropertyMol:
        return PropertyMol(mol)
    return mol