Beispiel #1
0
def lipinski_rule(mol):
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    return [
        Lipinski.NHOHCount(mol) <= 5,
        Lipinski.NOCount(mol) <= 10,
        Descriptors.ExactMolWt(mol) <= 500,
        LogP('logP').run(fingerprint) <= 5]
Beispiel #2
0
# This model predicts solubility of a molecule by combining the logP
#   regression model and Fingerprint regression model
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
import pickle
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors, Descriptors
from chemical_models import AtomPairSolubility, LogP, LogPSolubility

data = open('data/water_solubility/aqsol.txt', 'r')

logP_model = LogP('logP')
logP_solubility_model = LogPSolubility('logS_logP')
atom_pair_sol_model = AtomPairSolubility('water_solubility')

X = []
Y = []

for line in data.readlines():
    split = line.split(' ')

    mol = Chem.MolFromSmiles(split[0])
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

    logP = logP_model.run(fingerprint)
    logP_sol = logP_solubility_model.run(logP)
    atom_pair_sol = atom_pair_sol_model.run(fingerprint)

    # Additional ESOL empirical model to increase accuracy