def add_with_rdkit(self, filename, filetype, strict=False, **kwargs):
        """Add molecule using rdkit"""
        with open(filename, "r") as inputfile:
            text = inputfile.read()

            if filetype == "auto":
                filetype = os.path.splitext(filename)[1]

            if filetype == "inchi":
                rdmol = Chem.MolFromInchi(text)
            elif filetype == "mol2":
                rdmol = Chem.MolFromMol2File(filename)
            elif filetype == "mol":
                rdmol = Chem.MolFromMolFile(filename)
            elif filetype == "pdb":
                rdmol = Chem.MolFromPdbFile(filename)
            elif filetype in ["smi", "smiles"]:
                rdmol = Chem.MolFromSmiles(text)
            elif filetype == "tpl":
                rdmol = Chem.MolFromTPLFile(filename)
            elif filetype == "smarts":
                if strict:
                    raise IOError("Smarts is pattern, smiles for molecules.")
                else:
                    print "WARNING: Use smiles, ignoring smarts." % filetype
                    return
            else:
                if strict:
                    raise IOError("Filetype (%s) not in rdkit." % filetype)
                else:
                    print "WARNING: Could not filetype %s." % filetype
                    return

        rdmol = Chem.addHs(rdmol)
        self.molstr.append(Chem.MolToPDBBlock)
    def _with_canonical_atom_ordering(self: _T) -> _T:
        # Make all building blocks canonically ordered too.
        building_blocks = {
            building_block:
                building_block.with_canonical_atom_ordering()

            for building_block in self._num_building_blocks
        }

        # Cache these mappings for later, to avoid unnecessary
        # re-computations of canonical ordering.
        canonical_map = {
            building_block: building_block.get_canonical_atom_ids()
            for building_block in self._num_building_blocks
        }

        self._num_building_blocks = {
            building_block: num
            for building_block, num
            in zip(
                building_blocks.values(),
                self._num_building_blocks.values(),
            )
        }

        ordering = rdkit.CanonicalRankAtoms(self.to_rdkit_mol())
        id_map = {
            new_id: atom.get_id()
            for new_id, atom in zip(ordering, self._atoms)
        }
        super()._with_canonical_atom_ordering()
        atom_map = {
            old_id: self._atoms[new_id]
            for old_id, new_id in enumerate(ordering)
        }
        old_atom_infos = self._atom_infos

        def get_atom_info(atom: Atom) -> AtomInfo:

            old_atom_info = old_atom_infos[id_map[atom.get_id()]]
            old_building_block = old_atom_info.get_building_block()

            if old_building_block is None:
                return AtomInfo(
                    atom=atom,
                    building_block_atom=None,
                    building_block=None,
                    building_block_id=None,
                )

            old_building_block_atom = (
                old_atom_info.get_building_block_atom()
            )

            canonical_building_block_atom_id = canonical_map[
                old_building_block
            ][old_building_block_atom.get_id()]

            canonical_building_block = building_blocks[
                old_building_block
            ]

            canonical_building_block_atom, = (
                canonical_building_block.get_atoms(
                    atom_ids=canonical_building_block_atom_id,
                )
            )

            return AtomInfo(
                atom=atom,
                building_block_atom=canonical_building_block_atom,
                building_block=canonical_building_block,
                building_block_id=(
                    old_atom_info.get_building_block_id()
                ),
            )

        def get_bond_info(info: BondInfo) -> BondInfo:
            building_block = info.get_building_block()
            return BondInfo(
                bond=_utilities.sort_bond_atoms_by_id(
                    info.get_bond().with_atoms(atom_map)
                ),
                building_block=(
                    building_block
                    if building_block is None
                    else building_blocks[building_block]
                ),
                building_block_id=info.get_building_block_id(),
            )

        self._atom_infos = tuple(map(get_atom_info, self._atoms))
        self._bond_infos = tuple(sorted(
            map(get_bond_info, self._bond_infos),
            key=_utilities.get_bond_info_atom_ids,
        ))
        return self
    def testMany(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:

                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(1),
                                 (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(2),
                                 (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(3),
                                 (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(4),
                                 (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(5),
                                 (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(6),
                                 (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(7),
                                 (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(8),
                                 (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(9),
                                 (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(10),
                                 (False, 0.0, 0.0, 0.0, 0.0, 0.0))

                self.assertEqual(
                    store.descriptors().getDict(7),
                    toDict((True, 176.15650064, 0.0, 1.0, 0.0, 1.0)))

                calc = store.getDescriptorCalculator()

                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    sm = AllChem.MolToSmiles(m)
                    inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    self.assertEqual(store.lookupInchiKey(inchi), [i])
                    v = store.descriptors().get(i)
                    sv = tuple(calc.process(sm))
                    self.assertEqual(v, sv)

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from sklearn.ensemble import RandomForestClassifier
import numpy
import numpy as np

m1 = Chem.MolFromSmiles('c1ccccc1')
m2 = Chem.MolFromSmiles('c1ccccc1CC')
m3 = Chem.MolFromSmiles('c1ccncc1')
m4 = Chem.MolFromSmiles('c1ccncc1CC')
mols = [m1, m2, m3, m4]
fps = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in mols]


from rdkit.Avalon.pyAvalonTools import GetAvalonFP
from rdkit.Chem import AllChem

y_name = 'boiling_point'
fingerprint_type = 0  # 0: MACCS key, 1: RDKit, 2: Morgan (≒ECFP4), 3: Avalon

sdf = Chem.SDMolSupplier('boiling_point.sdf')  # sdf ファイルの読み込み

# フィンガープリントの計算
# 分子ごとに、リスト型の変数 y に物性値を、fingerprints に計算されたフィンガープリントを、smiles に SMILES を追加
fingerprints, y, smiles = [], [], []
print('分子の数 :', len(sdf))
for index, molecule in enumerate(sdf):
    print(index + 1, '/', len(sdf))
    y.append(float(molecule.GetProp(y_name)))
    smiles.append(Chem.MolToSmiles(molecule))
    if fingerprint_type == 0:
        fingerprints.append(AllChem.GetMACCSKeysFingerprint(molecule))
    elif fingerprint_type == 1:
        fingerprints.append(Chem.RDKFingerprint(molecule))
    elif fingerprint_type == 2:
        fingerprints.append(AllChem.GetMorganFingerprintAsBitVect(molecule, 2, nBits=2048))
    elif fingerprint_type == 3:
        fingerprints.append(GetAvalonFP(molecule))
fingerprints = pd.DataFrame(np.array(fingerprints, int), index=smiles)
y = pd.DataFrame(y, index=smiles, columns=[y_name])

# 保存
fingerprints_with_y = pd.concat([y, fingerprints], axis=1)  # y と記述子を結合
fingerprints_with_y.to_csv('fingerprints_with_y.csv')  # csv ファイルに保存。同じ名前のファイルがあるときは上書きされますので注意してください
Beispiel #6
0
from rdkit.Chem import MACCSkeys
from rdkit.Chem.Fingerprints import FingerprintMols

import filter
from pipelines.utils import utils

### start field name defintions #########################################

field_Similarity = "Similarity"

### start main execution #########################################

descriptors = {
    #'atompairs':   lambda m: Pairs.GetAtomPairFingerprint(m),
    'maccs': lambda m: MACCSkeys.GenMACCSKeys(m),
    'morgan2': lambda m: AllChem.GetMorganFingerprint(m, 2),
    'morgan3': lambda m: AllChem.GetMorganFingerprint(m, 3),
    'rdkit': lambda m: FingerprintMols.FingerprintMol(m),
    #'topo':        lambda m: Torsions.GetTopologicalTorsionFingerprint(m)
}

metrics = {
    'asymmetric': DataStructs.AsymmetricSimilarity,
    'braunblanquet': DataStructs.BraunBlanquetSimilarity,
    'cosine': DataStructs.CosineSimilarity,
    'dice': DataStructs.DiceSimilarity,
    'kulczynski': DataStructs.KulczynskiSimilarity,
    'mcconnaughey': DataStructs.McConnaugheySimilarity,
    #'onbit':DataStructs.OnBitSimilarity,
    'rogotgoldberg': DataStructs.RogotGoldbergSimilarity,
    'russel': DataStructs.RusselSimilarity,
Beispiel #7
0
if options.verbose:
    print("Generating a maximum of", options.maxconfs, "per a mol")

if options.etkdg and not Chem.ETKDG:
    print("ETKDB does not appear to be implemented.  Please upgrade RDKit.")
    sys.exit(1)

split = os.path.splitext(output)
if split[1] == '.gz':
    outf = gzip.open(output, 'wt+')
    output = split[0]  #strip .gz
else:
    outf = open(output, 'w+')

if os.path.splitext(output)[1] == '.pdb':
    sdwriter = Chem.PDBWriter(outf)
else:
    sdwriter = Chem.SDWriter(outf)

if sdwriter is None:
    print("Could not open ".output)
    sys.exit(-1)

for line in smifile:
    toks = line.split()
    smi = toks[0]
    name = ' '.join(toks[1:])

    pieces = smi.split('.')
    if len(pieces) > 1:
        smi = max(pieces, key=len)  #take largest component by length
Beispiel #8
0
def mol_from_mol_file(mol_file):
    """
    Creates a rdkit molecule from a ``.mol`` (V3000) file.

    Parameters
    ----------
    mol_file : :class:`str`
        The full of the .mol file from which an rdkit molecule should
        be instantiated.

    Returns
    -------
    :class:`rdkit.Mol`
        An rdkit instance of the molecule held in `mol2_file`.

    Raises
    ------
    :class:`ChargedMolError`
        If an atom row has more than 8 coloumns it is usually because
        there is a 9th coloumn indicating atomic charge. Such molecules
        are not currently supported, so an error is raised.

    :class:`MolFileError`
        If the file is not a V3000 ``.mol`` file.

    """

    e_mol = rdkit.EditableMol(rdkit.Mol())
    conf = rdkit.Conformer()

    with open(mol_file, 'r') as f:
        take_atom = False
        take_bond = False
        v3000 = False

        for line in f:
            if 'V3000' in line:
                v3000 = True

            if 'M  V30 BEGIN ATOM' in line:
                take_atom = True
                continue

            if 'M  V30 END ATOM' in line:
                take_atom = False
                continue

            if 'M  V30 BEGIN BOND' in line:
                take_bond = True
                continue

            if 'M  V30 END BOND' in line:
                take_bond = False
                continue

            if take_atom:
                words = line.split()
                if len(words) > 8:
                    raise ChargedMolError(mol_file,
                                          ('Atom row has more'
                                           ' than 8 coloumns. Likely '
                                           'due to a charged atom.'))
                _, _, _, atom_sym, *coords, _ = words
                coords = [float(x) for x in coords]
                atom_coord = Point3D(*coords)
                atom_id = e_mol.AddAtom(rdkit.Atom(atom_sym))
                conf.SetAtomPosition(atom_id, atom_coord)
                continue

            if take_bond:
                *_, bond_id, bond_order, atom1, atom2 = line.split()
                e_mol.AddBond(int(atom1)-1, int(atom2)-1,
                              bond_dict[bond_order])
                continue
    if not v3000:
        raise MolFileError(mol_file, 'Not a V3000 .mol file.')

    mol = e_mol.GetMol()
    mol.AddConformer(conf)
    return mol
Beispiel #9
0
 def IsReactionTemplateMoleculeAgent(self, mol, agentThreshold):
     '''
     tests if a molecule can be classified as an agent depending on the ratio of mapped atoms and a give threshold
     :return:
     '''
     return AllChem.IsReactionTemplateMoleculeAgent(mol, agentThreshold=agentThreshold)
Beispiel #10
0
    def HasReactionSubstructMatch(self, queryReaction):
        '''

        :return:
        '''
        return AllChem.HasReactionSubstructMatch(self.rxn, queryReaction)
Beispiel #11
0
 def CreateStructuralFingerprintForReaction(self):
     structural_reaction_fp = AllChem.CreateStructuralFingerprintForReaction(self.rxn)
     return structural_reaction_fp
Beispiel #12
0
 def CreateDifferenceFingerprintForReaction(self):
     difference_reaction_fp = AllChem.CreateDifferenceFingerprintForReaction(self.rxn)
     return difference_reaction_fp
Beispiel #13
0
 def Compute2DCoordsForReaction(self):
     AllChem.Compute2DCoordsForReaction(self.rxn)
Beispiel #14
0
    def RemoveMappingNumbersFromReactions(self):
        '''

        :return: None
        '''
        AllChem.RemoveMappingNumbersFromReactions(self.rxn)
Beispiel #15
0
    def ReactionToSmiles(self):
        '''

        :return:
        '''
        return AllChem.ReactionToSmiles(self.rxn)
Beispiel #16
0
def mol_from_mae_file(mae_path):
    """
    Creates a ``rdkit`` molecule from a ``.mae`` file.

    Parameters
    ----------
    mol2_file : :class:`str`
        The full path of the ``.mae`` file from which an rdkit molecule
        should be instantiated.

    Returns
    -------
    :class:`rdkit.Mol`
        An ``rdkit`` instance of the molecule held in `mae_file`.

    """

    mol = rdkit.EditableMol(rdkit.Mol())
    conf = rdkit.Conformer()

    with open(mae_path, 'r') as mae:
        content = re.split(r'[{}]', mae.read())

    prev_block = deque([''], maxlen=1)
    for block in content:
        if 'm_atom[' in prev_block[0]:
            atom_block = block
        if 'm_bond[' in prev_block[0]:
            bond_block = block
        prev_block.append(block)

    labels, data_block, *_ = atom_block.split(':::')
    labels = [label for label in labels.split('\n')
              if not label.isspace() and label != '']

    data_block = [a.split() for a in data_block.split('\n') if
                  not a.isspace() and a != '']

    for line in data_block:
        line = [word for word in line if word != '"']
        if len(labels) != len(line):
            raise RuntimeError(('Number of labels does'
                                ' not match number of columns'
                                ' in .mae file.'))

        for label, data in zip(labels, line):
            if 'x_coord' in label:
                x = float(data)
            if 'y_coord' in label:
                y = float(data)
            if 'z_coord' in label:
                z = float(data)
            if 'atomic_number' in label:
                atom_num = int(data)

        atom_sym = periodic_table[atom_num]
        atom_coord = Point3D(x, y, z)
        atom_id = mol.AddAtom(rdkit.Atom(atom_sym))
        conf.SetAtomPosition(atom_id, atom_coord)

    labels, data_block, *_ = bond_block.split(':::')
    labels = [label for label in labels.split('\n')
              if not label.isspace() and label != '']
    data_block = [a.split() for a in data_block.split('\n')
                  if not a.isspace() and a != '']

    for line in data_block:
        if len(labels) != len(line):
            raise RuntimeError(('Number of labels does'
                                ' not match number of '
                                'columns in .mae file.'))

        for label, data in zip(labels, line):
            if 'from' in label:
                atom1 = int(data) - 1
            if 'to' in label:
                atom2 = int(data) - 1
            if 'order' in label:
                bond_order = str(int(data))
        mol.AddBond(atom1, atom2, bond_dict[bond_order])

    mol = mol.GetMol()
    mol.AddConformer(conf)
    return mol
Beispiel #17
0
    args = parser.parse_args()

    filename = args.file

    data = pd.read_excel(filename)

    molatomtypes = {}
    atomtypesset = set()
    mollogd = {}

    errorscounter = 0
    errorssmiles = []
    dim = len(data["SMILES"])
    for idx, ss in enumerate(data["SMILES"]):
        start = time.time()
        name = data["NO"][idx]

        mol = Chem.MolFromSmiles(str(ss))

        AllChem.Compute2DCoords(mol)
        AllChem.EmbedMolecule(mol, randomSeed=0xf00d)
        Chem.Kekulize(mol)
        mol_3D = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol_3D, randomSeed=0xf00d)

        fout = Chem.SDWriter(name + ".mol")
        fout.write(mol_3D)
        fout.close()

        end = time.time()
Beispiel #18
0
 def testDrawReaction(self):
     # this shouldn't throw an exception...
     rxn = AllChem.ReactionFromSmarts(
         "[c;H1:3]1:[c:4]:[c:5]:[c;H1:6]:[c:7]2:[nH:8]:[c:9]:[c;H1:1]:[c:2]:1:2.O=[C:10]1[#6;H2:11][#6;H2:12][N:13][#6;H2:14][#6;H2:15]1>>[#6;H2:12]3[#6;H1:11]=[C:10]([c:1]1:[c:9]:[n:8]:[c:7]2:[c:6]:[c:5]:[c:4]:[c:3]:[c:2]:1:2)[#6;H2:15][#6;H2:14][N:13]3"
     )
     img = Draw.ReactionToImage(rxn)
Beispiel #19
0
#! /usr/local/bin/python

import sys, os
from rdkit import Chem
from rdkit.Chem import AllChem, ChemicalForceFields

mols = Chem.SDMolSupplier(sys.argv[1])
mols = [Chem.AddHs(mol) for mol in mols if mol != None]
print len(mols)
output = Chem.SDWriter("conf_gen_mols.sdf")

for i in range(len(mols)):
    mol = mols[i]
    if (mol.HasProp('_Name')):
        mol_Name = mol.GetProp('_Name')
    molprop = AllChem.MMFFGetMoleculeProperties(mol)
    field = AllChem.MMFFGetMoleculeForceField(mol, molprop)
    if field.Minimize() == 0:
        e = field.CalcEnergy()
        mol.SetProp("MMFF94", "%s" % e)

    else:
        mol.SetProp("MMFF94", "ND")
    output.write(mol)
output.close()
Beispiel #20
0
def getRMS(mol, c1, c2):
    rms = Chem.GetBestRMS(mol, mol, c1, c2)
    return rms
Beispiel #21
0
def load_toxcast_dataset(data_path, task_names=None, featurizer=None):
    """Load toxcast dataset,process the input information and the featurizer.

    The data file contains a csv table, in which columns below are used:

    :smiles:  SMILES representation of the molecular structure.
    :ACEA_T47D_80hr_Negative~ “Tanguay_ZF_120hpf_YSE_up” - Bioassays results
    :SR-XXX: Stress response bioassays results

    Args:
        data_path(str): the path to the cached npz path.
        task_names(list): a list of header names to specify the columns to fetch from 
            the csv file.
        featurizer(pahelix.featurizers.Featurizer): the featurizer to use for 
            processing the data. If not none, The ``Featurizer.gen_features`` will be 
            applied to the raw data.
    
    Returns:
        an InMemoryDataset instance.
    
    Example:
        .. code-block:: python

            dataset = load_toxcast_dataset('./toxcast/raw')
            print(len(dataset))


    References:
    [1]Richard, Ann M., et al. “ToxCast chemical landscape: paving the road to 21st century toxicology.” Chemical research in toxicology 29.8 (2016): 1225-1251.
    [2]please refer to the section “high-throughput assay information” at https://www.epa.gov/chemical-research/toxicity-forecaster-toxcasttm-data for details.

    """
    if task_names is None:
        task_names = get_default_toxcast_task_names(data_path)

    file = os.listdir(data_path)[0]
    input_df = pd.read_csv(join(data_path, file), sep=',')
    smiles_list = input_df['smiles']
    from rdkit.Chem import AllChem
    rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list]
    # Some smiles could not be successfully converted
    # to rdkit mol object so them to None
    preprocessed_rdkit_mol_objs_list = [
        m if not m is None else None for m in rdkit_mol_objs_list
    ]
    smiles_list = [
        AllChem.MolToSmiles(m) if not m is None else None
        for m in preprocessed_rdkit_mol_objs_list
    ]
    labels = input_df[task_names]
    labels = labels.replace(0, -1)  # convert 0 to -1
    labels = labels.fillna(0)  # convert nan to 0

    data_list = []
    for i in range(len(smiles_list)):
        if smiles_list[i] is None:
            continue
        raw_data = {}
        raw_data['smiles'] = smiles_list[i]
        raw_data['label'] = labels.values[i]

        if not featurizer is None:
            data = featurizer.gen_features(raw_data)
        else:
            data = raw_data

        if not data is None:
            data_list.append(data)

    dataset = InMemoryDataset(data_list)
    return dataset
Beispiel #22
0
        elif operation == SubshapeCombineOperations.INTERSECT:
            cs.grid &= subshape2.grid
        else:
            raise ValueError('bad combination operation')
        return cs


if __name__ == '__main__':
    from rdkit.Chem import AllChem, ChemicalFeatures
    from rdkit.Chem.PyMol import MolViewer
    #cmpd = Chem.MolFromSmiles('CCCc1cc(C(=O)O)ccc1')
    #cmpd = Chem.AddHs(cmpd)
    if 1:
        cmpd = Chem.MolFromSmiles('C1=CC=C1C#CC1=CC=C1')
        cmpd = Chem.AddHs(cmpd)
        AllChem.EmbedMolecule(cmpd)
        AllChem.UFFOptimizeMolecule(cmpd)
        AllChem.CanonicalizeMol(cmpd)
        print(Chem.MolToMolBlock(cmpd), file=file('testmol.mol', 'w+'))
    else:
        cmpd = Chem.MolFromMolFile('testmol.mol')
    builder = SubshapeBuilder()
    if 1:
        shape = builder.GenerateSubshapeShape(cmpd)
    v = MolViewer()
    if 1:
        import tempfile
        tmpFile = tempfile.mktemp('.grd')
        v.server.deleteAll()
        Geometry.WriteGridToFile(shape.grid, tmpFile)
        time.sleep(1)
Beispiel #23
0
    def get_prediction_template(self):
        '''
            This function creates a tabular model template based
            on the QMRF document type
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path,
                                         'prediction-results.pkl')
        conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')
        try:
            with open(results_file_name, "rb") as input_file:
                conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at: {results_file_name}')
            raise e

        # First get Name, Inchi and InChIkey

        names = conveyor.getVal('obj_nam')
        smiles = conveyor.getVal('SMILES')
        inchi = [AllChem.MolToInchi(AllChem.MolFromSmiles(m)) for m in smiles]
        inchikeys = [
            AllChem.InchiToInchiKey(
                AllChem.MolToInchi(AllChem.MolFromSmiles(m))) for m in smiles
        ]
        predictions = []
        applicability = []
        if self.parameters['quantitative']['value']:
            raise ('Prediction template for quantitative endpoints'
                   ' not implemented yet')
        if not self.parameters['conformal']['value']:
            predictions = conveyor.getVal('values')
        else:
            c0 = np.asarray(conveyor.getVal('c0'))
            c1 = np.asarray(conveyor.getVal('c1'))

            predictions = []
            for i, j in zip(c0, c1):
                prediction = ''
                if i == j:
                    prediction = 'out of AD'
                    applicability.append('out')
                if i != j:
                    if i == True:
                        prediction = 'Inactive'
                    else:
                        prediction = 'Active'
                    applicability.append('in')

                predictions.append(prediction)

        # Now create the spreedsheats for prediction

        # First write summary
        summary = ("Study name\n" + "Endpoint\n" + "QMRF-ID\n" +
                   "(Target)Compounds\n" +
                   "Compounds[compounds]\tName\tInChiKey\n")

        for name, inch in zip(names, inchikeys):
            summary += f'\t{name}\t{inch}\n'

        summary += ("\nFile\n" + "Author name\n" + "E-mail\n" + "Role\n" +
                    "Affiliation\n" + "Date\n")

        with open('summary_document.tsv', 'w') as out:
            out.write(summary)

        # Now prediction details
        # Pandas is used to ease the table creation.

        reporting = pd.DataFrame()

        reporting['InChI'] = inchi
        reporting['CAS-RN'] = '-'
        reporting['SMILES'] = smiles
        reporting['prediction'] = predictions
        reporting['Applicability_domain'] = applicability
        reporting['reliability'] = '-'
        reporting['Structural_analogue_1_CAS'] = '-'
        reporting['Structural_analogue_1_smiles'] = '-'
        reporting['Structural_analogue_1_source'] = '-'
        reporting['Structural_analogue_1_experimental_value'] = '-'
        reporting['Structural_analogue_2_CAS'] = '-'
        reporting['Structural_analogue_2_smiles'] = '-'
        reporting['Structural_analogue_2_source'] = '-'
        reporting['Structural_analogue_2_experimental_value'] = '-'
        reporting['Structural_analogue_3_CAS'] = '-'
        reporting['Structural_analogue_3_smiles'] = '-'
        reporting['Structural_analogue_3_source'] = '-'
        reporting['Structural_analogue_3_experimental_value'] = '-'

        reporting.to_csv('prediction_report.tsv', sep='\t', index=False)
Beispiel #24
0
def render_pdb_data(target_id,
                    test,
                    num_samps,
                    this_item,
                    this_samp,
                    file_name,
                    react_anal_id,
                    method_id,
                    type_id,
                    opt=None):
    """Function to take a dict of PlifProb PKs with corresponding values
    Use the value to fill the BFactor column"""
    # Define this histogram
    new_hist = Histogram.objects.get_or_create(test_made=test,
                                               num_samps=num_samps,
                                               hist_title=this_item)[0]
    mol = Chem.MolFromSmiles("C")
    mol_txt = ""
    for item in this_samp:
        # First get the PlifProbe if we need
        if opt:
            pp = PlifProbe()
            pp.x_com = item[0]
            pp.y_com = item[1]
            pp.z_com = item[2]
            pp.intensity = item[3]
        else:
            pp = PlifProbe.objects.get(pk=item)
        # Make an editable molecule
        em = AllChem.EditableMol(mol)
        em.RemoveAtom(0)
        sv = em.AddAtom(Chem.Atom(11))
        gm = em.GetMol()
        Chem.SanitizeMol(gm)
        AllChem.EmbedMolecule(gm)
        cnf = gm.GetConformer()
        sp = AllChem.rdGeometry.Point3D()
        # Now add the coords
        sp.x = pp.x_com
        x_char = str(pp.x_com)[:9]
        sp.y = pp.y_com
        y_char = str(pp.y_com)[:9]
        sp.z = pp.z_com
        z_char = str(pp.z_com)[:9]
        cnf.SetAtomPosition(0, sp)
        out_mol = cnf.GetOwningMol()
        out_mol = Chem.MolFromPDBBlock(Chem.MolToPDBBlock(out_mol))
        atm = out_mol.GetAtomWithIdx(0)
        if opt:
            atm.GetPDBResidueInfo().SetTempFactor(pp.intensity)
            val_out = pp.intensity
        else:
            atm.GetPDBResidueInfo().SetTempFactor(this_samp[item])
            val_out = this_samp[item]
        # So now create this data
        my_gp = GridPoint.objects.get_or_create(x_char=x_char,
                                                y_char=y_char,
                                                z_char=z_char,
                                                target_id=target_id)[0]
        my_grid_val = GPVal.objects.get_or_create(my_anal_id=react_anal_id,
                                                  num_samps=num_samps,
                                                  type_id=type_id,
                                                  method_id=method_id,
                                                  gp_id=my_gp,
                                                  target_id=target_id)[0]
        # Now add this value
        my_grid_val.value = val_out
        my_grid_val.pdb_info = Chem.MolToPDBBlock(out_mol)
        my_grid_val.sdf_info = Chem.MolToMolBlock(out_mol)
        my_grid_val.save()
        mol_txt += Chem.MolToPDBBlock(out_mol) + "\n"
    out_f = open(file_name, "w")
    out_f.write(mol_txt)
    out_f.close()
Beispiel #25
0
 def setUp(self):
     for smiles in REACTION_SMILES_SAMPLE:
         ReactionModel.objects.create(rxn=smiles)
     for smarts in REACTION_SMARTS_SAMPLE:
         ReactionModel.objects.create(
             rxn=Chem.ReactionFromSmarts(str(smarts)))
Beispiel #26
0
 def get_fingerprint(self, molecule):
     return AllChem.GetMorganFingerprint(molecule, radius=2)
Beispiel #27
0
 def setUp(self):
     mol = Chem.MolFromSmiles('c1cocc1')
     CtabModel.objects.create(ctab=Chem.MolToMolBlock(mol))
     CtabModel.objects.create(ctab='rubbish')
    def testAppend(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:

                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    self.assertEqual(store.lookupInchiKey(inchi), [i])
                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(1),
                                 (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(2),
                                 (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(3),
                                 (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(4),
                                 (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(5),
                                 (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(6),
                                 (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(7),
                                 (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(8),
                                 (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(9),
                                 (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(10),
                                 (False, 0.0, 0.0, 0.0, 0.0, 0.0))

            fname = tempfile.mktemp() + ".smi"
            with open(fname, 'w') as f:
                f.write(many_smiles2)

            opts.smilesfile = fname
            append_store.append_store(opts)
            with contextlib.closing(DescriptaStore(storefname)) as store:
                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    m = store.molIndex().getRDMol(i + 11)
                    self.assertTrue(m != None)
                    inchi2 = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    self.assertEqual(inchi, inchi2)
                    self.assertEqual(store.lookupInchiKey(inchi), [i, i + 11])

                for i in range(2):
                    self.assertEqual(store.descriptors().get(11 + 0),
                                     (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 1),
                                     (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 2),
                                     (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 3),
                                     (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 4),
                                     (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 5),
                                     (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 6),
                                     (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 7),
                                     (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 8),
                                     (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 9),
                                     (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 10),
                                     (False, 0.0, 0.0, 0.0, 0.0, 0.0))

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Beispiel #29
0
def get_conformations(rdkit_mol, nconfs=1, name=None, forcefield=None, rms=-1):
    """
    Generates 3D conformation(s) for an rdkit_mol

    :parameter rdkit_mol: RDKit molecule
    :type rdkit_mol: rdkit.Chem.Mol
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
    optimization and ranking of comformations. The default value None results
    in skipping of the geometry optimization step
    :parameter float rms: Root Mean Square deviation threshold for removing
    similar/equivalent conformations.
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: plams.Molecule or list of plams Molecules
    """
    def MMFFenergy(cid):
        ff = AllChem.MMFFGetMoleculeForceField(
            rdkit_mol,
            AllChem.MMFFGetMoleculeProperties(rdkit_mol),
            confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \
                  "\nNo geometry optimization was performed."
            warn(msg)
            energy = 1e9
        return energy

    def UFFenergy(cid):
        ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \
                  "\nNo geometry optimization was performed."
            warn(msg)
            energy = 1e9
        return energy

    if name:
        rdkit_mol.SetProp('name', name)
    cids = list(
        AllChem.EmbedMultipleConfs(rdkit_mol,
                                   nconfs,
                                   pruneRmsThresh=rms,
                                   randomSeed=1))
    if forcefield:
        optimize_molecule, energy = {
            'uff': [AllChem.UFFOptimizeMolecule, UFFenergy],
            'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy],
        }[forcefield]
        for cid in cids:
            optimize_molecule(rdkit_mol, confId=cid)
        cids.sort(key=energy)
        if rms > 0:
            keep = [cids[0]]
            for cid in cids[1:]:
                for idx in keep:
                    try:
                        r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx)
                    except:
                        r = rms + 1
                        message = "Alignment failed in multiple conformation generation: "
                        message += Chem.MolToSmiles(rdkit_mol)
                        message += "\nAssuming different conformations."
                        warn(message)
                    if r < rms:
                        break
                else:
                    keep.append(cid)
            cids = keep
    if nconfs == 1:
        return from_rdmol(rdkit_mol)
    else:
        return [from_rdmol(rdkit_mol, cid) for cid in cids]
def calcFingerprints(smiles):
    m1 = Chem.MolFromSmiles(smiles)
    fp = AllChem.GetMorganFingerprintAsBitVect(m1, 2, nBits=2048)
    binary = fp.ToBitString()
    return list(binary)
Beispiel #31
0
def apply_reaction_smarts(mol,
                          reaction_smarts,
                          complete=False,
                          forcefield=None,
                          return_rdmol=False):
    """
    Applies reaction smirks and returns product.
    If returned as a plams molecule, plams.Molecule.properties.orig_atoms
    is a list of indices of atoms that have not been changed
    (which can for example be used partially optimize new atoms only with the freeze keyword)

    :parameter mol: molecule to be modified
    :type mol: plams.Molecule or rdkit.Chem.Mol
    :parameter str reactions_smarts: Reactions smarts to be applied to molecule
    :parameter complete: Apply reaction until no further changes occur or given
        fraction of reaction centers have been modified
    :type complete: bool or float (value between 0 and 1)
    :parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based
        geometry optimization of product structures.
    :type forcefield: str
    :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule
    :return: (product molecule, list of unchanged atoms)
    :rtype: (plams.Molecule, list of int)
    """
    def react(reactant, reaction):
        """ Apply reaction to reactant and return products """
        ps = reaction.RunReactants([reactant])
        # if reaction doesn't apply, return the reactant
        if len(ps) == 0:
            return [(reactant, range(reactant.GetNumAtoms()))]
        full = len(ps)
        while complete:  # when complete is True
            # apply reaction until no further changes
            r = random.randint(0, len(ps) - 1)
            reactant = ps[r][0]
            ps = reaction.RunReactants([reactant])
            if len(ps) == 0 or len(ps) / full < (1 - complete):
                ps = [[reactant]]
                break
        # add hydrogens and generate coordinates for new atoms
        products = []
        for p in ps[0]:
            Chem.SanitizeMol(p)
            q = Chem.AddHs(p)
            Chem.SanitizeMol(q)
            u = gen_coords_rdmol(
                q)  # These are the atoms that have not changed
            products.append((q, u))
        return products

    mol = to_rdmol(mol)
    reaction = AllChem.ReactionFromSmarts(reaction_smarts)
    # RDKit removes fragments that are disconnected from the reaction center
    # In order to keep these, the molecule is first split in separate fragments
    # and the results, including non-reacting parts, are re-combined afterwards
    frags = (Chem.GetMolFrags(mol, asMols=True))
    product = Chem.Mol()
    unchanged = []  # List of atoms that have not changed
    for frag in frags:
        for p, u in react(frag, reaction):
            unchanged += [product.GetNumAtoms() + i for i in u]
            product = Chem.CombineMols(product, p)
    if forcefield:
        optimize_coordinates(product, forcefield, fixed=unchanged)
    # The molecule is returned together with a list of atom indices of the atoms
    # that are identical to those
    # in the reactants. This list can be used in subsequent partial optimization of the molecule
    if not return_rdmol:
        product = from_rdmol(product)
        product.properties.orig_atoms = [a + 1 for a in unchanged]
    return product