def set_reaction(self,product_smiles): try: product_mol = SM(product_smiles) prod_num_rings = CalcNumRings(product_mol) except: print("error in the product",product_smiles) return [] try: reactant_list = self.rxn.RunReactants([product_mol]) except: print("Reaction failed") print(self.reaction_name,self.smarts,product_smiles) exit() approved_reactants = [] for reactant_mol in reactant_list: #condition 1 - conserved ring count try: [Chem.SanitizeMol(r) for r in reactant_mol] if np.sum([CalcNumRings(r) for r in reactant_mol]) - prod_num_rings == self.ring_change_count: approved_reactants.append(reactant_mol) except: print("could not sanitize ",product_smiles,".".join([MS(r) for r in reactant_mol])) return approved_reactants
def preprocess(dataset, dir_input): train_smiles = list(dataset['SMILES']) train_adducts = dataset['Adducts'] train_ccs = list(dataset['CCS']) adducts_encoder = AdductToOneHotEncoder() adducts_encoder.fit(train_adducts) adducts = adducts_encoder.transform(train_adducts) Smiles, molecules, adjacencies, properties, descriptors = '', [], [], [], [] for i, smi in enumerate(train_smiles): if '.' in smi: continue smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi)) mol = Chem.MolFromSmiles(smi) mol = Chem.AddHs(mol) atoms = create_atoms(mol) i_jbond_dict = create_ijbonddict(mol) fingerprints = extract_fingerprints(atoms, i_jbond_dict, radius) adjacency = create_adjacency(mol) Smiles += smi + '\n' molecules.append(fingerprints) adjacencies.append(adjacency) properties.append([[train_ccs[i]]]) descriptors.append([ ExactMolWt(mol), MolLogP(mol), GetFormalCharge(mol), CalcNumRings(mol), CalcNumRotatableBonds(mol), CalcLogS(mol), AcidCount(mol), BaseCount(mol), APolar(mol), BPolar(mol) ]) properties = np.array(properties) mean, std = np.mean(properties), np.std(properties) properties = np.array((properties - mean) / std) os.makedirs(dir_input, exist_ok=True) with open(dir_input + 'Smiles.txt', 'w') as f: f.write(Smiles) np.save(dir_input + 'molecules', molecules) np.save(dir_input + 'adducts', adducts) np.save(dir_input + 'adjacencies', adjacencies) np.save(dir_input + 'properties', properties) np.save(dir_input + 'descriptors', descriptors) np.save(dir_input + 'mean', mean) np.save(dir_input + 'std', std) dump_dictionary(fingerprint_dict, dir_input + 'fingerprint_dict.pickle')
def _construct(self, molecules, ring_cutoff=10, progress=False, annotate=True): """Private method for graph construction, called by constructors. Parameters ---------- molecules : iterable An iterable of rdkit molecules for processing ring_cutoff : int, optional Ignore molecules with more than the specified number of rings to avoid extended processing times. The default is 10. annotate : bool, optional If True write an annotated murcko scaffold SMILES string to each molecule edge (molecule --> scaffold). The default is True. progress : bool If True show a progress bar monitoring progress. The default is False """ rdlogger.setLevel(4) # Suppress the RDKit logs progress = progress is False desc = self.__class__.__name__ for molecule in tqdm(molecules, disable=progress, desc=desc, miniters=1, dynamic_ncols=True): if molecule is None: # logged in suppliers continue init_molecule_name(molecule) if CalcNumRings(molecule) > ring_cutoff: name = molecule.GetProp('_Name') logger.warning( f'Molecule {name} filtered (> {ring_cutoff} rings)') continue rdmolops.RemoveStereochemistry(molecule) scaffold = Scaffold(get_murcko_scaffold(molecule)) if scaffold: # Checks that a scaffold has at least 1 atom annotation = None if annotate: annotation = get_annotated_murcko_scaffold( molecule, scaffold.mol, False) self.add_scaffold_node(scaffold) self.add_molecule_node(molecule) self.add_molecule_edge(molecule, scaffold, annotation=annotation) if scaffold.rings.count > 1: self._recursive_constructor(scaffold) else: name = molecule.GetProp('_Name') logger.warning(f'No top level scaffold for molecule {name}') rdlogger.setLevel(3) # Enable the RDKit logs
def _construct(self, molecules, init_args, ring_cutoff=10, progress=False): """Private method for graph construction, called by constructors. The constructor is fairly generic allowing the user to customise hierarchy construction by changing the _hierarchy_constructor function. The user also has further control of this process and is able to change how a scaffold is initialized (`_initialize_scaffold`), how it is preprocessed (`_preprocess_scaffold`) and how molecules with no top-level scaffold (i.e. linear molecules) are handled. Parameters ---------- molecules : iterable An iterable of rdkit molecules for processing init_args : dict A dictionary containing arguments for scaffold initialization and preprocessing. ring_cutoff : int, optional Ignore molecules with more than the specified number of rings to avoid extended processing times. The default is 10. progress : bool, optional If True show a progress bar monitoring progress. The default is False. See Also -------- _initialize_scaffold _preprocess_scaffold _process_no_top_level """ desc, progress = self.__class__.__name__, progress is False for molecule in tqdm( molecules, disable=progress, desc=desc, miniters=1, dynamic_ncols=True, ): if molecule is None: continue init_molecule_name(molecule) if CalcNumRings(molecule) > ring_cutoff: name = molecule.GetProp('_Name') logger.warning( f'Molecule {name} filtered (> {ring_cutoff} rings)') self.graph['num_filtered'] = self.graph.get('num_filtered', 0) + 1 continue scaffold = self._initialize_scaffold(molecule, init_args) if scaffold is not None: self._hierarchy_constructor(scaffold)
def get_numRings(mol): ''' Number of Rings ''' return CalcNumRings(mol)
def predict(): req_data = request.get_json() print("Data requested") print(req_data) conditions = req_data["conditions"] num_rounds = req_data["num_rounds"] loyality = req_data["loyality"] num_of_mols = req_data["num_of_mols"] # molecules closer to aspirin # "Melting point", "Boiling point", "Water Solubility", loyality to drug design rules, number of rounds, number of molecules #conditions = [120, 285, -2.1, 0.7, 10, 10] #data = conditions[] result_arr = [] for round in range(num_rounds): print(f"round {round}") number_generate = 100 endp = torch.tensor(scaler.transform(np.array([conditions]))) print(endp.shape) c = deepcopy(endp) c = [str(l) for l in list(c.numpy())] # endp = endp.unsqueeze(0) endp = endp.repeat(100, 1) endp = endp.unsqueeze(0) endp = endp.repeat(3, 1, 1) endp = endp.float() endp = endp.cuda() res = model.sample(endp, number_generate, dataset.model) valid = len(res) * 100 / number_generate print("valid : {} %".format(valid)) # writer.add_scalar("Valid", valid, cnt) res = [robust_standardizer(mol) for mol in res] res = list(filter(lambda x: x is not None, res)) mols = res print("Mols obtained") print(mols) vals_another = requests.post("https://backend.syntelly.com/tempSmilesArrToPredict", json={'smiles': mols}).json() for idx in range(len(vals_another)): elem = vals_another[idx]['data'] for e in elem: e["endpoint_id"] = endpoints_id2name[e["endpoint_id"]] e2v = [] for idx in range(len(vals_another)): e2v.append(dict(zip([e['endpoint_id'] for e in vals_another[idx]['data']], [e['value'] for e in vals_another[idx]['data']]))) smiles = [val['smiles'] for val in vals_another] mols = [robust_standardizer(mol) for mol in smiles] mols = [Chem.MolFromSmiles(mol) for mol in mols] molecular_weights = [CalcExactMolWt(mol) for mol in mols] logp = [MolLogP(mol) for mol in mols] atom_count = [mol.GetNumAtoms() for mol in mols] molar_reflactivity = [MolMR(mol) for mol in mols] numRings = [CalcNumRings(mol) for mol in mols] numRotBonds = [CalcNumRotatableBonds(mol) for mol in mols] numHAcceptors = [NumHAcceptors(mol) for mol in mols] numHDonors = [NumHDonors(mol) for mol in mols] bcf = [e['Bioconcentration factor'] for e in e2v] dev_tox = [e['Developmental toxicity'] for e in e2v] flash_point = [e['Flash point'] for e in e2v] boiling_point = [e['Boiling point'] for e in e2v] melting_points = [e['Melting point'] for e in e2v] water_solubility = [e['Water Solubility'] for e in e2v] result = [0] * len(smiles) for idx in range(len(smiles)): val = 0 if (molecular_weights[idx] <= 480 and molecular_weights[idx] >= 160): val += 1 if (logp[idx] <= 5.6 and logp[idx] >= -0.4): val += 1 if (atom_count[idx] <= 70 and atom_count[idx] >= 20): val += 1 if (molar_reflactivity[idx] >= 40 and molar_reflactivity[idx] <= 130): val += 1 if (bcf[idx] < 3): val += 1 if (dev_tox[idx] == 'Negative'): val += 1 if (flash_point[idx] > (350 - 273.15)): val += 1 if (boiling_point[idx] > (300 - 273.15)): val += 1 if (numRings[idx] > 0): val += 1 if (numRotBonds[idx] < 5): val += 1 if (numHAcceptors[idx] <= 10): val += 1 if (numHDonors[idx] <= 5): val += 1 if (val / 12 >= loyality): result[idx] = val print(result) for idx in range(len(result)): if (result[idx] > 0): result_arr.append((smiles[idx], result[idx], (melting_points[idx], boiling_point[idx], water_solubility[idx]), mean_squared_error( scaler.transform(np.array( [[melting_points[idx], boiling_point[idx], water_solubility[idx]]])), scaler.transform(np.array([conditions])) ))) result_arr.sort(key=lambda x: x[3]) print(result_arr[:num_of_mols]) return jsonify(result_arr[:num_of_mols])
def _calculate_phys_chem_property(self, mol): return CalcNumRings(mol)