def save(self, force_insert=False, force_update=False, using=None, update_fields=None, *args, **kwargs): smiles = self.smiles if smiles: try: self.mol = Chem.MolFromSmiles(smiles) self.mol_block = Chem.MolToMolBlock(self.mol) self.mol_weight = Descriptors.ExactMolWt(self.mol) self.alogp = MolLogP(self.mol) self.hba = NumHAcceptors(self.mol) self.hbd = NumHDonors(self.mol) self.psa = Chem.MolSurf.TPSA(self.mol) self.rtb = NumRotatableBonds(self.mol) super(Compound, self).save(*args, **kwargs) self.formula = Chem.rdMolDescriptors.CalcMolFormula(self.mol) self.bfp = MORGANBV_FP(Value(smiles)) except (ValueError, TypeError): print "Error when storing mol object" pass super(Compound, self).save(*args, **kwargs)
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): self.molecule_chembl_id_url = 'https://www.ebi.ac.uk/chembl/compound/inspect/{}'.format( self.molecule_chembl_id) super(ChEMBL_small_molecule, self).save() smiles = self.molecule_smile if smiles: try: self.mol = Chem.MolFromSmiles(smiles) self.mol_block = Chem.MolToMolBlock(self.mol) self.mol_weight = Descriptors.ExactMolWt(self.mol) self.alogp = MolLogP(self.mol) self.hba = NumHAcceptors(self.mol) self.hbd = NumHDonors(self.mol) self.psa = Chem.MolSurf.TPSA(self.mol) self.rtb = NumRotatableBonds(self.mol) super(ChEMBL_small_molecule, self).save() self.formula = Chem.rdMolDescriptors.CalcMolFormula(self.mol) self.bfp = MORGANBV_FP(Value(smiles)) except (ValueError, TypeError): print('Error when storing mol object') pass super(ChEMBL_small_molecule, self).save()
def analyze(self, smiles: List[str], only_drugs=True) -> pd.DataFrame: features = self.preprocessor.transform(smiles) # RDKit molecular properties inchikey = [] weight = [] logp = [] hdonors = [] hacceptors = [] for example in smiles: mol = MolFromSmiles(example) if not mol: raise ValueError("Malformed molecule passed in to analyze") inchikey.append(MolToInchiKey(mol)) weight.append(ExactMolWt(mol)) logp.append(MolLogP(mol)) hdonors.append(NumHDonors(mol)) hacceptors.append(NumHAcceptors(mol)) # Scores safety = self.safety.predict(features) feasibility = self.feasibility.predict(features) bbbp = self.bbbp.predict_proba(features) dataframe = pd.DataFrame( { "key": inchikey, "smiles": smiles, "weight": weight, "logp": logp, "hdonors": hdonors, "hacceptors": hacceptors, "safety": safety, "feasibility": feasibility, "bbbp": (i[1] for i in bbbp), } ) if only_drugs: # Lipinsky's rules dataframe = dataframe[dataframe.weight < 500] dataframe = dataframe[dataframe.hdonors <= 5] dataframe = dataframe[dataframe.hacceptors <= 10] dataframe = dataframe[dataframe.logp <= 5] # Filter too toxic and infeasible compounds dataframe = dataframe[dataframe.safety > 0.75] dataframe = dataframe[dataframe.feasibility > 0.75] dataframe = dataframe.reset_index(drop=True) return dataframe
def get_global_features(mol): """Computes global-level features for a molecule. Parameters ---------- mol : rdkit mol Returns ------- [np.ndarray] Global-level features """ # MW, TPSA, logP, n.hdonors mw = MolWt(mol) tpsa = CalcTPSA(mol) logp = MolLogP(mol) n_hdonors = NumHDonors(mol) desc = np.array([mw, tpsa, logp, n_hdonors], dtype=np.float32) return desc
def lipinski_filter(smiles): mol = MolFromSmiles(smiles) return MolLogP(mol) <= 5 and NumHAcceptors(mol) <= 10 and NumHDonors(mol) <= 5 and 100 <= ExactMolWt(mol) <= 500
def _calculate_phys_chem_property(self, mol): return NumHDonors(mol)
nhdonors = [] values = [] dataset = [] for data in list(LABEL_GUIDE.keys()) + ["cyp"]: with open(os.path.join(DATA_PATH, data, f"data_{data}.pt"), "rb") as handle: inchis, v = pickle.load(handle) values.extend(v) for inchi in tqdm(inchis): mol = MolFromInchi(inchi) mws.append(MolWt(mol)) logps.append(MolLogP(mol)) nhdonors.append(NumHDonors(mol)) dataset.append(DATASET_GUIDE[data]) df = pd.DataFrame({ "Molecular weight (gr./mol)": mws, r"aLog$P$": logps, "No. hydrogen donors": nhdonors, "values": values, "dataset": dataset, }) f, axs = plt.subplots(1, 3, figsize=(18, 6)) axs[0].grid(alpha=0.5) axs[1].grid(alpha=0.5) axs[2].grid(alpha=0.5)
def predict(): req_data = request.get_json() print("Data requested") print(req_data) conditions = req_data["conditions"] num_rounds = req_data["num_rounds"] loyality = req_data["loyality"] num_of_mols = req_data["num_of_mols"] # molecules closer to aspirin # "Melting point", "Boiling point", "Water Solubility", loyality to drug design rules, number of rounds, number of molecules #conditions = [120, 285, -2.1, 0.7, 10, 10] #data = conditions[] result_arr = [] for round in range(num_rounds): print(f"round {round}") number_generate = 100 endp = torch.tensor(scaler.transform(np.array([conditions]))) print(endp.shape) c = deepcopy(endp) c = [str(l) for l in list(c.numpy())] # endp = endp.unsqueeze(0) endp = endp.repeat(100, 1) endp = endp.unsqueeze(0) endp = endp.repeat(3, 1, 1) endp = endp.float() endp = endp.cuda() res = model.sample(endp, number_generate, dataset.model) valid = len(res) * 100 / number_generate print("valid : {} %".format(valid)) # writer.add_scalar("Valid", valid, cnt) res = [robust_standardizer(mol) for mol in res] res = list(filter(lambda x: x is not None, res)) mols = res print("Mols obtained") print(mols) vals_another = requests.post("https://backend.syntelly.com/tempSmilesArrToPredict", json={'smiles': mols}).json() for idx in range(len(vals_another)): elem = vals_another[idx]['data'] for e in elem: e["endpoint_id"] = endpoints_id2name[e["endpoint_id"]] e2v = [] for idx in range(len(vals_another)): e2v.append(dict(zip([e['endpoint_id'] for e in vals_another[idx]['data']], [e['value'] for e in vals_another[idx]['data']]))) smiles = [val['smiles'] for val in vals_another] mols = [robust_standardizer(mol) for mol in smiles] mols = [Chem.MolFromSmiles(mol) for mol in mols] molecular_weights = [CalcExactMolWt(mol) for mol in mols] logp = [MolLogP(mol) for mol in mols] atom_count = [mol.GetNumAtoms() for mol in mols] molar_reflactivity = [MolMR(mol) for mol in mols] numRings = [CalcNumRings(mol) for mol in mols] numRotBonds = [CalcNumRotatableBonds(mol) for mol in mols] numHAcceptors = [NumHAcceptors(mol) for mol in mols] numHDonors = [NumHDonors(mol) for mol in mols] bcf = [e['Bioconcentration factor'] for e in e2v] dev_tox = [e['Developmental toxicity'] for e in e2v] flash_point = [e['Flash point'] for e in e2v] boiling_point = [e['Boiling point'] for e in e2v] melting_points = [e['Melting point'] for e in e2v] water_solubility = [e['Water Solubility'] for e in e2v] result = [0] * len(smiles) for idx in range(len(smiles)): val = 0 if (molecular_weights[idx] <= 480 and molecular_weights[idx] >= 160): val += 1 if (logp[idx] <= 5.6 and logp[idx] >= -0.4): val += 1 if (atom_count[idx] <= 70 and atom_count[idx] >= 20): val += 1 if (molar_reflactivity[idx] >= 40 and molar_reflactivity[idx] <= 130): val += 1 if (bcf[idx] < 3): val += 1 if (dev_tox[idx] == 'Negative'): val += 1 if (flash_point[idx] > (350 - 273.15)): val += 1 if (boiling_point[idx] > (300 - 273.15)): val += 1 if (numRings[idx] > 0): val += 1 if (numRotBonds[idx] < 5): val += 1 if (numHAcceptors[idx] <= 10): val += 1 if (numHDonors[idx] <= 5): val += 1 if (val / 12 >= loyality): result[idx] = val print(result) for idx in range(len(result)): if (result[idx] > 0): result_arr.append((smiles[idx], result[idx], (melting_points[idx], boiling_point[idx], water_solubility[idx]), mean_squared_error( scaler.transform(np.array( [[melting_points[idx], boiling_point[idx], water_solubility[idx]]])), scaler.transform(np.array([conditions])) ))) result_arr.sort(key=lambda x: x[3]) print(result_arr[:num_of_mols]) return jsonify(result_arr[:num_of_mols])