def add_with_rdkit(self, filename, filetype, strict=False, **kwargs): """Add molecule using rdkit""" with open(filename, "r") as inputfile: text = inputfile.read() if filetype == "auto": filetype = os.path.splitext(filename)[1] if filetype == "inchi": rdmol = Chem.MolFromInchi(text) elif filetype == "mol2": rdmol = Chem.MolFromMol2File(filename) elif filetype == "mol": rdmol = Chem.MolFromMolFile(filename) elif filetype == "pdb": rdmol = Chem.MolFromPdbFile(filename) elif filetype in ["smi", "smiles"]: rdmol = Chem.MolFromSmiles(text) elif filetype == "tpl": rdmol = Chem.MolFromTPLFile(filename) elif filetype == "smarts": if strict: raise IOError("Smarts is pattern, smiles for molecules.") else: print "WARNING: Use smiles, ignoring smarts." % filetype return else: if strict: raise IOError("Filetype (%s) not in rdkit." % filetype) else: print "WARNING: Could not filetype %s." % filetype return rdmol = Chem.addHs(rdmol) self.molstr.append(Chem.MolToPDBBlock)
def _with_canonical_atom_ordering(self: _T) -> _T: # Make all building blocks canonically ordered too. building_blocks = { building_block: building_block.with_canonical_atom_ordering() for building_block in self._num_building_blocks } # Cache these mappings for later, to avoid unnecessary # re-computations of canonical ordering. canonical_map = { building_block: building_block.get_canonical_atom_ids() for building_block in self._num_building_blocks } self._num_building_blocks = { building_block: num for building_block, num in zip( building_blocks.values(), self._num_building_blocks.values(), ) } ordering = rdkit.CanonicalRankAtoms(self.to_rdkit_mol()) id_map = { new_id: atom.get_id() for new_id, atom in zip(ordering, self._atoms) } super()._with_canonical_atom_ordering() atom_map = { old_id: self._atoms[new_id] for old_id, new_id in enumerate(ordering) } old_atom_infos = self._atom_infos def get_atom_info(atom: Atom) -> AtomInfo: old_atom_info = old_atom_infos[id_map[atom.get_id()]] old_building_block = old_atom_info.get_building_block() if old_building_block is None: return AtomInfo( atom=atom, building_block_atom=None, building_block=None, building_block_id=None, ) old_building_block_atom = ( old_atom_info.get_building_block_atom() ) canonical_building_block_atom_id = canonical_map[ old_building_block ][old_building_block_atom.get_id()] canonical_building_block = building_blocks[ old_building_block ] canonical_building_block_atom, = ( canonical_building_block.get_atoms( atom_ids=canonical_building_block_atom_id, ) ) return AtomInfo( atom=atom, building_block_atom=canonical_building_block_atom, building_block=canonical_building_block, building_block_id=( old_atom_info.get_building_block_id() ), ) def get_bond_info(info: BondInfo) -> BondInfo: building_block = info.get_building_block() return BondInfo( bond=_utilities.sort_bond_atoms_by_id( info.get_bond().with_atoms(atom_map) ), building_block=( building_block if building_block is None else building_blocks[building_block] ), building_block_id=info.get_building_block_id(), ) self._atom_infos = tuple(map(get_atom_info, self._atoms)) self._bond_infos = tuple(sorted( map(get_bond_info, self._bond_infos), key=_utilities.get_bond_info_atom_ids, )) return self
def testMany(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual(store.lookupName(str(i)), i) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) self.assertEqual( store.descriptors().getDict(7), toDict((True, 176.15650064, 0.0, 1.0, 0.0, 1.0))) calc = store.getDescriptorCalculator() for i in range(10): m = store.molIndex().getRDMol(i) sm = AllChem.MolToSmiles(m) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual(store.lookupInchiKey(inchi), [i]) v = store.descriptors().get(i) sv = tuple(calc.process(sm)) self.assertEqual(v, sv) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
from rdkit import Chem, DataStructs from rdkit.Chem import AllChem from sklearn.ensemble import RandomForestClassifier import numpy import numpy as np m1 = Chem.MolFromSmiles('c1ccccc1') m2 = Chem.MolFromSmiles('c1ccccc1CC') m3 = Chem.MolFromSmiles('c1ccncc1') m4 = Chem.MolFromSmiles('c1ccncc1CC') mols = [m1, m2, m3, m4] fps = [AllChem.GetMorganFingerprintAsBitVect(m, 2) for m in mols]
from rdkit.Avalon.pyAvalonTools import GetAvalonFP from rdkit.Chem import AllChem y_name = 'boiling_point' fingerprint_type = 0 # 0: MACCS key, 1: RDKit, 2: Morgan (≒ECFP4), 3: Avalon sdf = Chem.SDMolSupplier('boiling_point.sdf') # sdf ファイルの読み込み # フィンガープリントの計算 # 分子ごとに、リスト型の変数 y に物性値を、fingerprints に計算されたフィンガープリントを、smiles に SMILES を追加 fingerprints, y, smiles = [], [], [] print('分子の数 :', len(sdf)) for index, molecule in enumerate(sdf): print(index + 1, '/', len(sdf)) y.append(float(molecule.GetProp(y_name))) smiles.append(Chem.MolToSmiles(molecule)) if fingerprint_type == 0: fingerprints.append(AllChem.GetMACCSKeysFingerprint(molecule)) elif fingerprint_type == 1: fingerprints.append(Chem.RDKFingerprint(molecule)) elif fingerprint_type == 2: fingerprints.append(AllChem.GetMorganFingerprintAsBitVect(molecule, 2, nBits=2048)) elif fingerprint_type == 3: fingerprints.append(GetAvalonFP(molecule)) fingerprints = pd.DataFrame(np.array(fingerprints, int), index=smiles) y = pd.DataFrame(y, index=smiles, columns=[y_name]) # 保存 fingerprints_with_y = pd.concat([y, fingerprints], axis=1) # y と記述子を結合 fingerprints_with_y.to_csv('fingerprints_with_y.csv') # csv ファイルに保存。同じ名前のファイルがあるときは上書きされますので注意してください
from rdkit.Chem import MACCSkeys from rdkit.Chem.Fingerprints import FingerprintMols import filter from pipelines.utils import utils ### start field name defintions ######################################### field_Similarity = "Similarity" ### start main execution ######################################### descriptors = { #'atompairs': lambda m: Pairs.GetAtomPairFingerprint(m), 'maccs': lambda m: MACCSkeys.GenMACCSKeys(m), 'morgan2': lambda m: AllChem.GetMorganFingerprint(m, 2), 'morgan3': lambda m: AllChem.GetMorganFingerprint(m, 3), 'rdkit': lambda m: FingerprintMols.FingerprintMol(m), #'topo': lambda m: Torsions.GetTopologicalTorsionFingerprint(m) } metrics = { 'asymmetric': DataStructs.AsymmetricSimilarity, 'braunblanquet': DataStructs.BraunBlanquetSimilarity, 'cosine': DataStructs.CosineSimilarity, 'dice': DataStructs.DiceSimilarity, 'kulczynski': DataStructs.KulczynskiSimilarity, 'mcconnaughey': DataStructs.McConnaugheySimilarity, #'onbit':DataStructs.OnBitSimilarity, 'rogotgoldberg': DataStructs.RogotGoldbergSimilarity, 'russel': DataStructs.RusselSimilarity,
if options.verbose: print("Generating a maximum of", options.maxconfs, "per a mol") if options.etkdg and not Chem.ETKDG: print("ETKDB does not appear to be implemented. Please upgrade RDKit.") sys.exit(1) split = os.path.splitext(output) if split[1] == '.gz': outf = gzip.open(output, 'wt+') output = split[0] #strip .gz else: outf = open(output, 'w+') if os.path.splitext(output)[1] == '.pdb': sdwriter = Chem.PDBWriter(outf) else: sdwriter = Chem.SDWriter(outf) if sdwriter is None: print("Could not open ".output) sys.exit(-1) for line in smifile: toks = line.split() smi = toks[0] name = ' '.join(toks[1:]) pieces = smi.split('.') if len(pieces) > 1: smi = max(pieces, key=len) #take largest component by length
def mol_from_mol_file(mol_file): """ Creates a rdkit molecule from a ``.mol`` (V3000) file. Parameters ---------- mol_file : :class:`str` The full of the .mol file from which an rdkit molecule should be instantiated. Returns ------- :class:`rdkit.Mol` An rdkit instance of the molecule held in `mol2_file`. Raises ------ :class:`ChargedMolError` If an atom row has more than 8 coloumns it is usually because there is a 9th coloumn indicating atomic charge. Such molecules are not currently supported, so an error is raised. :class:`MolFileError` If the file is not a V3000 ``.mol`` file. """ e_mol = rdkit.EditableMol(rdkit.Mol()) conf = rdkit.Conformer() with open(mol_file, 'r') as f: take_atom = False take_bond = False v3000 = False for line in f: if 'V3000' in line: v3000 = True if 'M V30 BEGIN ATOM' in line: take_atom = True continue if 'M V30 END ATOM' in line: take_atom = False continue if 'M V30 BEGIN BOND' in line: take_bond = True continue if 'M V30 END BOND' in line: take_bond = False continue if take_atom: words = line.split() if len(words) > 8: raise ChargedMolError(mol_file, ('Atom row has more' ' than 8 coloumns. Likely ' 'due to a charged atom.')) _, _, _, atom_sym, *coords, _ = words coords = [float(x) for x in coords] atom_coord = Point3D(*coords) atom_id = e_mol.AddAtom(rdkit.Atom(atom_sym)) conf.SetAtomPosition(atom_id, atom_coord) continue if take_bond: *_, bond_id, bond_order, atom1, atom2 = line.split() e_mol.AddBond(int(atom1)-1, int(atom2)-1, bond_dict[bond_order]) continue if not v3000: raise MolFileError(mol_file, 'Not a V3000 .mol file.') mol = e_mol.GetMol() mol.AddConformer(conf) return mol
def IsReactionTemplateMoleculeAgent(self, mol, agentThreshold): ''' tests if a molecule can be classified as an agent depending on the ratio of mapped atoms and a give threshold :return: ''' return AllChem.IsReactionTemplateMoleculeAgent(mol, agentThreshold=agentThreshold)
def HasReactionSubstructMatch(self, queryReaction): ''' :return: ''' return AllChem.HasReactionSubstructMatch(self.rxn, queryReaction)
def CreateStructuralFingerprintForReaction(self): structural_reaction_fp = AllChem.CreateStructuralFingerprintForReaction(self.rxn) return structural_reaction_fp
def CreateDifferenceFingerprintForReaction(self): difference_reaction_fp = AllChem.CreateDifferenceFingerprintForReaction(self.rxn) return difference_reaction_fp
def Compute2DCoordsForReaction(self): AllChem.Compute2DCoordsForReaction(self.rxn)
def RemoveMappingNumbersFromReactions(self): ''' :return: None ''' AllChem.RemoveMappingNumbersFromReactions(self.rxn)
def ReactionToSmiles(self): ''' :return: ''' return AllChem.ReactionToSmiles(self.rxn)
def mol_from_mae_file(mae_path): """ Creates a ``rdkit`` molecule from a ``.mae`` file. Parameters ---------- mol2_file : :class:`str` The full path of the ``.mae`` file from which an rdkit molecule should be instantiated. Returns ------- :class:`rdkit.Mol` An ``rdkit`` instance of the molecule held in `mae_file`. """ mol = rdkit.EditableMol(rdkit.Mol()) conf = rdkit.Conformer() with open(mae_path, 'r') as mae: content = re.split(r'[{}]', mae.read()) prev_block = deque([''], maxlen=1) for block in content: if 'm_atom[' in prev_block[0]: atom_block = block if 'm_bond[' in prev_block[0]: bond_block = block prev_block.append(block) labels, data_block, *_ = atom_block.split(':::') labels = [label for label in labels.split('\n') if not label.isspace() and label != ''] data_block = [a.split() for a in data_block.split('\n') if not a.isspace() and a != ''] for line in data_block: line = [word for word in line if word != '"'] if len(labels) != len(line): raise RuntimeError(('Number of labels does' ' not match number of columns' ' in .mae file.')) for label, data in zip(labels, line): if 'x_coord' in label: x = float(data) if 'y_coord' in label: y = float(data) if 'z_coord' in label: z = float(data) if 'atomic_number' in label: atom_num = int(data) atom_sym = periodic_table[atom_num] atom_coord = Point3D(x, y, z) atom_id = mol.AddAtom(rdkit.Atom(atom_sym)) conf.SetAtomPosition(atom_id, atom_coord) labels, data_block, *_ = bond_block.split(':::') labels = [label for label in labels.split('\n') if not label.isspace() and label != ''] data_block = [a.split() for a in data_block.split('\n') if not a.isspace() and a != ''] for line in data_block: if len(labels) != len(line): raise RuntimeError(('Number of labels does' ' not match number of ' 'columns in .mae file.')) for label, data in zip(labels, line): if 'from' in label: atom1 = int(data) - 1 if 'to' in label: atom2 = int(data) - 1 if 'order' in label: bond_order = str(int(data)) mol.AddBond(atom1, atom2, bond_dict[bond_order]) mol = mol.GetMol() mol.AddConformer(conf) return mol
args = parser.parse_args() filename = args.file data = pd.read_excel(filename) molatomtypes = {} atomtypesset = set() mollogd = {} errorscounter = 0 errorssmiles = [] dim = len(data["SMILES"]) for idx, ss in enumerate(data["SMILES"]): start = time.time() name = data["NO"][idx] mol = Chem.MolFromSmiles(str(ss)) AllChem.Compute2DCoords(mol) AllChem.EmbedMolecule(mol, randomSeed=0xf00d) Chem.Kekulize(mol) mol_3D = Chem.AddHs(mol) AllChem.EmbedMolecule(mol_3D, randomSeed=0xf00d) fout = Chem.SDWriter(name + ".mol") fout.write(mol_3D) fout.close() end = time.time()
def testDrawReaction(self): # this shouldn't throw an exception... rxn = AllChem.ReactionFromSmarts( "[c;H1:3]1:[c:4]:[c:5]:[c;H1:6]:[c:7]2:[nH:8]:[c:9]:[c;H1:1]:[c:2]:1:2.O=[C:10]1[#6;H2:11][#6;H2:12][N:13][#6;H2:14][#6;H2:15]1>>[#6;H2:12]3[#6;H1:11]=[C:10]([c:1]1:[c:9]:[n:8]:[c:7]2:[c:6]:[c:5]:[c:4]:[c:3]:[c:2]:1:2)[#6;H2:15][#6;H2:14][N:13]3" ) img = Draw.ReactionToImage(rxn)
#! /usr/local/bin/python import sys, os from rdkit import Chem from rdkit.Chem import AllChem, ChemicalForceFields mols = Chem.SDMolSupplier(sys.argv[1]) mols = [Chem.AddHs(mol) for mol in mols if mol != None] print len(mols) output = Chem.SDWriter("conf_gen_mols.sdf") for i in range(len(mols)): mol = mols[i] if (mol.HasProp('_Name')): mol_Name = mol.GetProp('_Name') molprop = AllChem.MMFFGetMoleculeProperties(mol) field = AllChem.MMFFGetMoleculeForceField(mol, molprop) if field.Minimize() == 0: e = field.CalcEnergy() mol.SetProp("MMFF94", "%s" % e) else: mol.SetProp("MMFF94", "ND") output.write(mol) output.close()
def getRMS(mol, c1, c2): rms = Chem.GetBestRMS(mol, mol, c1, c2) return rms
def load_toxcast_dataset(data_path, task_names=None, featurizer=None): """Load toxcast dataset,process the input information and the featurizer. The data file contains a csv table, in which columns below are used: :smiles: SMILES representation of the molecular structure. :ACEA_T47D_80hr_Negative~ “Tanguay_ZF_120hpf_YSE_up” - Bioassays results :SR-XXX: Stress response bioassays results Args: data_path(str): the path to the cached npz path. task_names(list): a list of header names to specify the columns to fetch from the csv file. featurizer(pahelix.featurizers.Featurizer): the featurizer to use for processing the data. If not none, The ``Featurizer.gen_features`` will be applied to the raw data. Returns: an InMemoryDataset instance. Example: .. code-block:: python dataset = load_toxcast_dataset('./toxcast/raw') print(len(dataset)) References: [1]Richard, Ann M., et al. “ToxCast chemical landscape: paving the road to 21st century toxicology.” Chemical research in toxicology 29.8 (2016): 1225-1251. [2]please refer to the section “high-throughput assay information” at https://www.epa.gov/chemical-research/toxicity-forecaster-toxcasttm-data for details. """ if task_names is None: task_names = get_default_toxcast_task_names(data_path) file = os.listdir(data_path)[0] input_df = pd.read_csv(join(data_path, file), sep=',') smiles_list = input_df['smiles'] from rdkit.Chem import AllChem rdkit_mol_objs_list = [AllChem.MolFromSmiles(s) for s in smiles_list] # Some smiles could not be successfully converted # to rdkit mol object so them to None preprocessed_rdkit_mol_objs_list = [ m if not m is None else None for m in rdkit_mol_objs_list ] smiles_list = [ AllChem.MolToSmiles(m) if not m is None else None for m in preprocessed_rdkit_mol_objs_list ] labels = input_df[task_names] labels = labels.replace(0, -1) # convert 0 to -1 labels = labels.fillna(0) # convert nan to 0 data_list = [] for i in range(len(smiles_list)): if smiles_list[i] is None: continue raw_data = {} raw_data['smiles'] = smiles_list[i] raw_data['label'] = labels.values[i] if not featurizer is None: data = featurizer.gen_features(raw_data) else: data = raw_data if not data is None: data_list.append(data) dataset = InMemoryDataset(data_list) return dataset
elif operation == SubshapeCombineOperations.INTERSECT: cs.grid &= subshape2.grid else: raise ValueError('bad combination operation') return cs if __name__ == '__main__': from rdkit.Chem import AllChem, ChemicalFeatures from rdkit.Chem.PyMol import MolViewer #cmpd = Chem.MolFromSmiles('CCCc1cc(C(=O)O)ccc1') #cmpd = Chem.AddHs(cmpd) if 1: cmpd = Chem.MolFromSmiles('C1=CC=C1C#CC1=CC=C1') cmpd = Chem.AddHs(cmpd) AllChem.EmbedMolecule(cmpd) AllChem.UFFOptimizeMolecule(cmpd) AllChem.CanonicalizeMol(cmpd) print(Chem.MolToMolBlock(cmpd), file=file('testmol.mol', 'w+')) else: cmpd = Chem.MolFromMolFile('testmol.mol') builder = SubshapeBuilder() if 1: shape = builder.GenerateSubshapeShape(cmpd) v = MolViewer() if 1: import tempfile tmpFile = tempfile.mktemp('.grd') v.server.deleteAll() Geometry.WriteGridToFile(shape.grid, tmpFile) time.sleep(1)
def get_prediction_template(self): ''' This function creates a tabular model template based on the QMRF document type ''' # obtain the path and the default name of the results file results_file_path = utils.model_path(self.model, self.version) results_file_name = os.path.join(results_file_path, 'prediction-results.pkl') conveyor = Conveyor() # load the main class dictionary (p) from this yaml file if not os.path.isfile(results_file_name): raise Exception('Results file not found') try: with open(results_file_name, "rb") as input_file: conveyor.load(input_file) except Exception as e: # LOG.error(f'No valid results pickle found at: {results_file_name}') raise e # First get Name, Inchi and InChIkey names = conveyor.getVal('obj_nam') smiles = conveyor.getVal('SMILES') inchi = [AllChem.MolToInchi(AllChem.MolFromSmiles(m)) for m in smiles] inchikeys = [ AllChem.InchiToInchiKey( AllChem.MolToInchi(AllChem.MolFromSmiles(m))) for m in smiles ] predictions = [] applicability = [] if self.parameters['quantitative']['value']: raise ('Prediction template for quantitative endpoints' ' not implemented yet') if not self.parameters['conformal']['value']: predictions = conveyor.getVal('values') else: c0 = np.asarray(conveyor.getVal('c0')) c1 = np.asarray(conveyor.getVal('c1')) predictions = [] for i, j in zip(c0, c1): prediction = '' if i == j: prediction = 'out of AD' applicability.append('out') if i != j: if i == True: prediction = 'Inactive' else: prediction = 'Active' applicability.append('in') predictions.append(prediction) # Now create the spreedsheats for prediction # First write summary summary = ("Study name\n" + "Endpoint\n" + "QMRF-ID\n" + "(Target)Compounds\n" + "Compounds[compounds]\tName\tInChiKey\n") for name, inch in zip(names, inchikeys): summary += f'\t{name}\t{inch}\n' summary += ("\nFile\n" + "Author name\n" + "E-mail\n" + "Role\n" + "Affiliation\n" + "Date\n") with open('summary_document.tsv', 'w') as out: out.write(summary) # Now prediction details # Pandas is used to ease the table creation. reporting = pd.DataFrame() reporting['InChI'] = inchi reporting['CAS-RN'] = '-' reporting['SMILES'] = smiles reporting['prediction'] = predictions reporting['Applicability_domain'] = applicability reporting['reliability'] = '-' reporting['Structural_analogue_1_CAS'] = '-' reporting['Structural_analogue_1_smiles'] = '-' reporting['Structural_analogue_1_source'] = '-' reporting['Structural_analogue_1_experimental_value'] = '-' reporting['Structural_analogue_2_CAS'] = '-' reporting['Structural_analogue_2_smiles'] = '-' reporting['Structural_analogue_2_source'] = '-' reporting['Structural_analogue_2_experimental_value'] = '-' reporting['Structural_analogue_3_CAS'] = '-' reporting['Structural_analogue_3_smiles'] = '-' reporting['Structural_analogue_3_source'] = '-' reporting['Structural_analogue_3_experimental_value'] = '-' reporting.to_csv('prediction_report.tsv', sep='\t', index=False)
def render_pdb_data(target_id, test, num_samps, this_item, this_samp, file_name, react_anal_id, method_id, type_id, opt=None): """Function to take a dict of PlifProb PKs with corresponding values Use the value to fill the BFactor column""" # Define this histogram new_hist = Histogram.objects.get_or_create(test_made=test, num_samps=num_samps, hist_title=this_item)[0] mol = Chem.MolFromSmiles("C") mol_txt = "" for item in this_samp: # First get the PlifProbe if we need if opt: pp = PlifProbe() pp.x_com = item[0] pp.y_com = item[1] pp.z_com = item[2] pp.intensity = item[3] else: pp = PlifProbe.objects.get(pk=item) # Make an editable molecule em = AllChem.EditableMol(mol) em.RemoveAtom(0) sv = em.AddAtom(Chem.Atom(11)) gm = em.GetMol() Chem.SanitizeMol(gm) AllChem.EmbedMolecule(gm) cnf = gm.GetConformer() sp = AllChem.rdGeometry.Point3D() # Now add the coords sp.x = pp.x_com x_char = str(pp.x_com)[:9] sp.y = pp.y_com y_char = str(pp.y_com)[:9] sp.z = pp.z_com z_char = str(pp.z_com)[:9] cnf.SetAtomPosition(0, sp) out_mol = cnf.GetOwningMol() out_mol = Chem.MolFromPDBBlock(Chem.MolToPDBBlock(out_mol)) atm = out_mol.GetAtomWithIdx(0) if opt: atm.GetPDBResidueInfo().SetTempFactor(pp.intensity) val_out = pp.intensity else: atm.GetPDBResidueInfo().SetTempFactor(this_samp[item]) val_out = this_samp[item] # So now create this data my_gp = GridPoint.objects.get_or_create(x_char=x_char, y_char=y_char, z_char=z_char, target_id=target_id)[0] my_grid_val = GPVal.objects.get_or_create(my_anal_id=react_anal_id, num_samps=num_samps, type_id=type_id, method_id=method_id, gp_id=my_gp, target_id=target_id)[0] # Now add this value my_grid_val.value = val_out my_grid_val.pdb_info = Chem.MolToPDBBlock(out_mol) my_grid_val.sdf_info = Chem.MolToMolBlock(out_mol) my_grid_val.save() mol_txt += Chem.MolToPDBBlock(out_mol) + "\n" out_f = open(file_name, "w") out_f.write(mol_txt) out_f.close()
def setUp(self): for smiles in REACTION_SMILES_SAMPLE: ReactionModel.objects.create(rxn=smiles) for smarts in REACTION_SMARTS_SAMPLE: ReactionModel.objects.create( rxn=Chem.ReactionFromSmarts(str(smarts)))
def get_fingerprint(self, molecule): return AllChem.GetMorganFingerprint(molecule, radius=2)
def setUp(self): mol = Chem.MolFromSmiles('c1cocc1') CtabModel.objects.create(ctab=Chem.MolToMolBlock(mol)) CtabModel.objects.create(ctab='rubbish')
def testAppend(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual(store.lookupName(str(i)), i) for i in range(10): m = store.molIndex().getRDMol(i) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual(store.lookupInchiKey(inchi), [i]) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) fname = tempfile.mktemp() + ".smi" with open(fname, 'w') as f: f.write(many_smiles2) opts.smilesfile = fname append_store.append_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual(store.lookupName(str(i)), i) for i in range(10): m = store.molIndex().getRDMol(i) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) m = store.molIndex().getRDMol(i + 11) self.assertTrue(m != None) inchi2 = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual(inchi, inchi2) self.assertEqual(store.lookupInchiKey(inchi), [i, i + 11]) for i in range(2): self.assertEqual(store.descriptors().get(11 + 0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def get_conformations(rdkit_mol, nconfs=1, name=None, forcefield=None, rms=-1): """ Generates 3D conformation(s) for an rdkit_mol :parameter rdkit_mol: RDKit molecule :type rdkit_mol: rdkit.Chem.Mol :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: plams.Molecule or list of plams Molecules """ def MMFFenergy(cid): ff = AllChem.MMFFGetMoleculeForceField( rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy def UFFenergy(cid): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy if name: rdkit_mol.SetProp('name', name) cids = list( AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1)) if forcefield: optimize_molecule, energy = { 'uff': [AllChem.UFFOptimizeMolecule, UFFenergy], 'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy], }[forcefield] for cid in cids: optimize_molecule(rdkit_mol, confId=cid) cids.sort(key=energy) if rms > 0: keep = [cids[0]] for cid in cids[1:]: for idx in keep: try: r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx) except: r = rms + 1 message = "Alignment failed in multiple conformation generation: " message += Chem.MolToSmiles(rdkit_mol) message += "\nAssuming different conformations." warn(message) if r < rms: break else: keep.append(cid) cids = keep if nconfs == 1: return from_rdmol(rdkit_mol) else: return [from_rdmol(rdkit_mol, cid) for cid in cids]
def calcFingerprints(smiles): m1 = Chem.MolFromSmiles(smiles) fp = AllChem.GetMorganFingerprintAsBitVect(m1, 2, nBits=2048) binary = fp.ToBitString() return list(binary)
def apply_reaction_smarts(mol, reaction_smarts, complete=False, forcefield=None, return_rdmol=False): """ Applies reaction smirks and returns product. If returned as a plams molecule, plams.Molecule.properties.orig_atoms is a list of indices of atoms that have not been changed (which can for example be used partially optimize new atoms only with the freeze keyword) :parameter mol: molecule to be modified :type mol: plams.Molecule or rdkit.Chem.Mol :parameter str reactions_smarts: Reactions smarts to be applied to molecule :parameter complete: Apply reaction until no further changes occur or given fraction of reaction centers have been modified :type complete: bool or float (value between 0 and 1) :parameter forcefield: Specify 'uff' or 'mmff' to apply forcefield based geometry optimization of product structures. :type forcefield: str :param bool return_rdmol: return a RDKit molecule if true, otherwise a PLAMS molecule :return: (product molecule, list of unchanged atoms) :rtype: (plams.Molecule, list of int) """ def react(reactant, reaction): """ Apply reaction to reactant and return products """ ps = reaction.RunReactants([reactant]) # if reaction doesn't apply, return the reactant if len(ps) == 0: return [(reactant, range(reactant.GetNumAtoms()))] full = len(ps) while complete: # when complete is True # apply reaction until no further changes r = random.randint(0, len(ps) - 1) reactant = ps[r][0] ps = reaction.RunReactants([reactant]) if len(ps) == 0 or len(ps) / full < (1 - complete): ps = [[reactant]] break # add hydrogens and generate coordinates for new atoms products = [] for p in ps[0]: Chem.SanitizeMol(p) q = Chem.AddHs(p) Chem.SanitizeMol(q) u = gen_coords_rdmol( q) # These are the atoms that have not changed products.append((q, u)) return products mol = to_rdmol(mol) reaction = AllChem.ReactionFromSmarts(reaction_smarts) # RDKit removes fragments that are disconnected from the reaction center # In order to keep these, the molecule is first split in separate fragments # and the results, including non-reacting parts, are re-combined afterwards frags = (Chem.GetMolFrags(mol, asMols=True)) product = Chem.Mol() unchanged = [] # List of atoms that have not changed for frag in frags: for p, u in react(frag, reaction): unchanged += [product.GetNumAtoms() + i for i in u] product = Chem.CombineMols(product, p) if forcefield: optimize_coordinates(product, forcefield, fixed=unchanged) # The molecule is returned together with a list of atom indices of the atoms # that are identical to those # in the reactants. This list can be used in subsequent partial optimization of the molecule if not return_rdmol: product = from_rdmol(product) product.properties.orig_atoms = [a + 1 for a in unchanged] return product