Esempi in Python per ForwardSDMolSupplier

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: rdkit.Chem

Classe/tipologia: ForwardSDMolSupplier

Esempi su hotexamples.com: 8

ForwardSDMolSupplier in Python: 8 esempi trovati. Questi sono i migliori esempi reali in Python per rdkit.Chem.ForwardSDMolSupplier, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

ForwardSDMolSupplier(8)

Metodi utilizzati di frequente

ForwardSDMolSupplier (8)

Esempio n. 1

Mostra file

def classify(sdf, label, lambdas):
    new_filename = "%s_class.sdf" % sdf.split('.sdf')[0]
    new_label = label + "_class"
    sdm = ForwardSDMolSupplier(sdf,
                               strictParsing=False,
                               removeHs=False,
                               sanitize=False)
    sdw = SDWriter(new_filename)
    counter = -1
    i = 0
    for mol in sdm:
        print(i)
        sys.stdout.flush()
        i += 1
        counter += 1
        if mol is None:
            print("%d rdkit couldn't read molecule" % counter, file=sys.stderr)
            sys.stderr.flush()
            continue
        c = None
        prop = floatify(mol.GetProp(label))
        if prop is None:
            print("couldn't convert %s to float or int...skip" %
                  mol.GetProp(label),
                  file=sys.stderr)
            sys.stderr.flush()
            continue
        for k, l in lambdas.items():
            if l(prop):
                c = k
                print("hit %s" % k)
                sys.stdout.flush()
                break
        if c is None:
            print("%d no prop range matched '%s' ..skip" %
                  (counter, mol.GetProp(label)),
                  prop,
                  type(prop),
                  file=sys.stderr)
            sys.stderr.flush()
            sys.stdout.flush()
            continue
        mol.SetProp(new_label, c)
        try:
            sdw.write(mol)
        except:
            print(
                "couldn't write mol %d to file, try to build mol from smiles" %
                i,
                file=sys.stderr)
            mol = MolFromSmiles(mol.GetProp("SMILES"))
            AllChem.Compute2DCoords(mol)
            mol.SetProp(new_label, c)
            try:
                sdw.write(mol)
            except:
                print("couldn't write mol %d to file...skip" % i,
                      file=sys.stderr)
    sdw.close()

Esempio n. 2

Mostra file

File: UnitTestInchi.py Progetto: zealseeker/rdkit

 def setUp(self):
   self.dataset = dict()
   self.dataset_inchi = dict()
   inf = gzip.open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.sdf.gz'),
                   'r')
   self.dataset['problematic'] = ForwardSDMolSupplier(inf, sanitize=False, removeHs=False)
   with open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.inchi'),
             'r') as intF:
     buf = intF.read().replace('\r\n', '\n').encode('latin1')
     intF.close()
   with io.BytesIO(buf) as inF:
     pkl = inF.read()
   self.dataset_inchi['problematic'] = pickle.loads(pkl, encoding='latin1')
   # disable logging
   RDLogger.DisableLog('rdApp.warning')

Esempio n. 3

Mostra file

File: molecule_properties_tools_fillDB.py Progetto: GPCRmd/GPCRmd

def open_molecule_file(uploadedfile, logfile=os.devnull, filetype=None):

    #charset = 'utf-8'
    #if "charset" in uploadedfile and uploadedfile.charset is not None:
    #charset = uploadedfile.charset
    if filetype is None:
        if "filetype" not in uploadedfile or uploadedfile.filetype is None:
            basename, ext = os.path.splitext(uploadedfile.name)
            ext = ext.lower()
            ext = ext.strip('.')
            if ext in MOLECULE_EXTENSION_TYPES.keys():
                filetype = MOLECULE_EXTENSION_TYPES[ext]
                uploadedfile.filetype = filetype
            else:
                raise InvalidMoleculeFileExtension(ext=ext)

        else:
            filetype = uploadedfile.filetype

    with stdout_redirected(to=logfile, stdout=sys.stderr):
        with stdout_redirected(to=logfile, stdout=sys.stdout):
            print('Loading molecule...')
            uploadedfile.seek(0)
            if filetype == 'sdf' or filetype == 'mol':

                suppl = ForwardSDMolSupplier(uploadedfile, removeHs=False)
                mol = next(suppl)
                try:
                    next(suppl)
                except StopIteration:
                    pass
                except:
                    raise
                else:
                    raise MultipleMoleculesinSDF()
                finally:
                    del suppl
                if mol is None:
                    if filetype == 'sdf':
                        raise ParsingError("Invalid SDFile file.")
                    else:
                        raise ParsingError("Invalid MDL Mol file.")
            print('Assigning chirality from struture...')
            AssignAtomChiralTagsFromStructure(mol, replaceExistingTags=False)
            print('Finished loading molecule.')

    return mol

Esempio n. 4

Mostra file

def read_sdf(sdf_file, requires_length=False):
    """Read an sdf file.

    Parameters
    ----------
    sdf_file: A file-like object
    requires_length: If True returns an enumerated Mol
        supplier, i.e. when monitoring progress

    Returns
    -------
    either a MolSupplier or an EnumeratedSupplier
    depending on whether a length is required
    """

    supplier = ForwardSDMolSupplier(sdf_file)
    if not requires_length:
        return MolSupplier(supplier)
    count = sdf_count(sdf_file)
    sdf_file.seek(0)
    return EnumeratedMolSupplier(supplier, count)

Esempio n. 5

Mostra file

File: sdf.py Progetto: tsenapathi/ScaffoldGraph

def read_sdf(sdf_file, requires_length=False):
    """Read molecules from an SDF.

    Parameters
    ----------
    sdf_file : file-like object
        An open SDF.
    requires_length : bool, optional
        If True returns an enumerated MolSupplier,
        i.e. when monitoring progress. The default
        is False.

    Returns
    -------
    MolSupplier or EnumeratedSupplier

    """
    supplier = ForwardSDMolSupplier(sdf_file)
    if not requires_length:
        return MolSupplier(supplier)
    count = sdf_count(sdf_file)
    sdf_file.seek(0)
    return EnumeratedMolSupplier(supplier, count)

Esempio n. 6

Mostra file

File: moleculePipeline.py Progetto: Andy-Wilkinson/ChemMLToolkit

 def _read_sdf() -> Iterator[Mol]:
     reader = ForwardSDMolSupplier(fileobj, removeHs=removeHs)
     return iter(reader)

Esempio n. 7

Mostra file

File: preprocess_pubchem.py Progetto: Bhuvanesh09/CASCADE

import warnings
from tqdm import tqdm

import gzip
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import ForwardSDMolSupplier

from itertools import islice

from nfp.preprocessing import MolAPreprocessor, GraphSequence

mols = []
with gzip.open('../../../../data/DFT8K/DFT.sdf.gz', 'r') as sdfile:
    mol_supplier = ForwardSDMolSupplier(sdfile, removeHs=False, sanitize=False)
    for mol in tqdm(mol_supplier):
        if mol:
            mols += [(int(mol.GetProp('_Name')), mol, mol.GetNumAtoms())]

mols = pd.DataFrame(mols, columns=['mol_id', 'Mol', 'n_atoms'])
mols = mols.set_index('mol_id', drop=True)

df = pd.read_csv('../../../../data/DFT8K/DFT8K.csv.gz', index_col=0)
#only choose C and H
df = df.loc[df.atom_type == 6]

df['Mol'] = mols.reindex(df.mol_id).Mol.values

grouped_df = df.groupby(['mol_id'])
df_Shift = []

Esempio n. 8

Mostra file

File: preprocess_qm9.py Progetto: anuprulez/nfp

import gzip
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import ForwardSDMolSupplier

from itertools import islice

from nfp.preprocessing import MolPreprocessor, GraphSequence
from sklearn.preprocessing import RobustScaler

df = pd.read_csv('../data/qm9.csv.gz')
df.index = df['index'].apply(lambda x: 'gdb_{}'.format(x))

f = gzip.open('../data/gdb9.sdf.gz')

mol_supplier = ForwardSDMolSupplier(f, removeHs=False)

mols = []
total_mols = len(df)

for mol in tqdm(mol_supplier, total=total_mols):
    if mol:
        mols += [(mol.GetProp('_Name'), mol, mol.GetNumAtoms())]

mols = pd.DataFrame(mols, columns=['mol_id', 'Mol', 'n_atoms'])

test = mols.sample(10000, random_state=0)
valid = mols[~mols.mol_id.isin(test.mol_id)].sample(10000, random_state=0)
train = mols[(~mols.mol_id.isin(test.mol_id)
              & ~mols.mol_id.isin(valid.mol_id))].sample(frac=1.,
                                                         random_state=0)