def lipinski_rule(mol): fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) return [ Lipinski.NHOHCount(mol) <= 5, Lipinski.NOCount(mol) <= 10, Descriptors.ExactMolWt(mol) <= 500, LogP('logP').run(fingerprint) <= 5]
# This model predicts solubility of a molecule by combining the logP # regression model and Fingerprint regression model from sklearn.neural_network import MLPRegressor from sklearn.model_selection import train_test_split from sklearn import preprocessing import numpy as np import pickle from rdkit import Chem from rdkit.Chem import rdMolDescriptors, Descriptors from chemical_models import AtomPairSolubility, LogP, LogPSolubility data = open('data/water_solubility/aqsol.txt', 'r') logP_model = LogP('logP') logP_solubility_model = LogPSolubility('logS_logP') atom_pair_sol_model = AtomPairSolubility('water_solubility') X = [] Y = [] for line in data.readlines(): split = line.split(' ') mol = Chem.MolFromSmiles(split[0]) fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) logP = logP_model.run(fingerprint) logP_sol = logP_solubility_model.run(logP) atom_pair_sol = atom_pair_sol_model.run(fingerprint) # Additional ESOL empirical model to increase accuracy