Example #1
0
 def save(self,
          force_insert=False,
          force_update=False,
          using=None,
          update_fields=None,
          *args,
          **kwargs):
     smiles = self.smiles
     if smiles:
         try:
             self.mol = Chem.MolFromSmiles(smiles)
             self.mol_block = Chem.MolToMolBlock(self.mol)
             self.mol_weight = Descriptors.ExactMolWt(self.mol)
             self.alogp = MolLogP(self.mol)
             self.hba = NumHAcceptors(self.mol)
             self.hbd = NumHDonors(self.mol)
             self.psa = Chem.MolSurf.TPSA(self.mol)
             self.rtb = NumRotatableBonds(self.mol)
             super(Compound, self).save(*args, **kwargs)
             self.formula = Chem.rdMolDescriptors.CalcMolFormula(self.mol)
             self.bfp = MORGANBV_FP(Value(smiles))
         except (ValueError, TypeError):
             print "Error when storing mol object"
             pass
     super(Compound, self).save(*args, **kwargs)
Example #2
0
    def save(self,
             force_insert=False,
             force_update=False,
             using=None,
             update_fields=None):
        self.molecule_chembl_id_url = 'https://www.ebi.ac.uk/chembl/compound/inspect/{}'.format(
            self.molecule_chembl_id)
        super(ChEMBL_small_molecule, self).save()

        smiles = self.molecule_smile

        if smiles:
            try:
                self.mol = Chem.MolFromSmiles(smiles)
                self.mol_block = Chem.MolToMolBlock(self.mol)
                self.mol_weight = Descriptors.ExactMolWt(self.mol)
                self.alogp = MolLogP(self.mol)
                self.hba = NumHAcceptors(self.mol)
                self.hbd = NumHDonors(self.mol)
                self.psa = Chem.MolSurf.TPSA(self.mol)
                self.rtb = NumRotatableBonds(self.mol)
                super(ChEMBL_small_molecule, self).save()
                self.formula = Chem.rdMolDescriptors.CalcMolFormula(self.mol)
                self.bfp = MORGANBV_FP(Value(smiles))
            except (ValueError, TypeError):
                print('Error when storing mol object')
                pass
        super(ChEMBL_small_molecule, self).save()
Example #3
0
    def analyze(self, smiles: List[str], only_drugs=True) -> pd.DataFrame:
        features = self.preprocessor.transform(smiles)

        # RDKit molecular properties
        inchikey = []
        weight = []
        logp = []
        hdonors = []
        hacceptors = []
        for example in smiles:
            mol = MolFromSmiles(example)
            if not mol:
                raise ValueError("Malformed molecule passed in to analyze")

            inchikey.append(MolToInchiKey(mol))
            weight.append(ExactMolWt(mol))
            logp.append(MolLogP(mol))
            hdonors.append(NumHDonors(mol))
            hacceptors.append(NumHAcceptors(mol))

        # Scores
        safety = self.safety.predict(features)
        feasibility = self.feasibility.predict(features)
        bbbp = self.bbbp.predict_proba(features)

        dataframe = pd.DataFrame(
            {
                "key": inchikey,
                "smiles": smiles,
                "weight": weight,
                "logp": logp,
                "hdonors": hdonors,
                "hacceptors": hacceptors,
                "safety": safety,
                "feasibility": feasibility,
                "bbbp": (i[1] for i in bbbp),
            }
        )

        if only_drugs:
            # Lipinsky's rules
            dataframe = dataframe[dataframe.weight < 500]
            dataframe = dataframe[dataframe.hdonors <= 5]
            dataframe = dataframe[dataframe.hacceptors <= 10]
            dataframe = dataframe[dataframe.logp <= 5]

            # Filter too toxic and infeasible compounds
            dataframe = dataframe[dataframe.safety > 0.75]
            dataframe = dataframe[dataframe.feasibility > 0.75]

            dataframe = dataframe.reset_index(drop=True)

        return dataframe
Example #4
0
def get_global_features(mol):
    """Computes global-level features for a molecule.

    Parameters
    ----------
    mol : rdkit mol

    Returns
    -------
    [np.ndarray]
        Global-level features
    """
    # MW, TPSA, logP, n.hdonors
    mw = MolWt(mol)
    tpsa = CalcTPSA(mol)
    logp = MolLogP(mol)
    n_hdonors = NumHDonors(mol)

    desc = np.array([mw, tpsa, logp, n_hdonors], dtype=np.float32)
    return desc
Example #5
0
def lipinski_filter(smiles):
    mol = MolFromSmiles(smiles)
    return MolLogP(mol) <= 5 and NumHAcceptors(mol) <= 10 and NumHDonors(mol) <= 5 and 100 <= ExactMolWt(mol) <= 500
Example #6
0
 def _calculate_phys_chem_property(self, mol):
     return NumHDonors(mol)
Example #7
0
    nhdonors = []
    values = []
    dataset = []

    for data in list(LABEL_GUIDE.keys()) + ["cyp"]:
        with open(os.path.join(DATA_PATH, data, f"data_{data}.pt"),
                  "rb") as handle:
            inchis, v = pickle.load(handle)

        values.extend(v)

        for inchi in tqdm(inchis):
            mol = MolFromInchi(inchi)
            mws.append(MolWt(mol))
            logps.append(MolLogP(mol))
            nhdonors.append(NumHDonors(mol))
            dataset.append(DATASET_GUIDE[data])

    df = pd.DataFrame({
        "Molecular weight (gr./mol)": mws,
        r"aLog$P$": logps,
        "No. hydrogen donors": nhdonors,
        "values": values,
        "dataset": dataset,
    })

    f, axs = plt.subplots(1, 3, figsize=(18, 6))

    axs[0].grid(alpha=0.5)
    axs[1].grid(alpha=0.5)
    axs[2].grid(alpha=0.5)
def predict():
    req_data = request.get_json()
    print("Data requested")
    print(req_data)
    conditions = req_data["conditions"]
    num_rounds = req_data["num_rounds"]
    loyality = req_data["loyality"]
    num_of_mols = req_data["num_of_mols"]

    # molecules closer to aspirin
    # "Melting point", "Boiling point", "Water Solubility", loyality to drug design rules, number of rounds, number of molecules
    #conditions = [120, 285, -2.1, 0.7, 10, 10]
    #data = conditions[]
    result_arr = []
    for round in range(num_rounds):
        print(f"round {round}")
        number_generate = 100
        endp = torch.tensor(scaler.transform(np.array([conditions])))
        print(endp.shape)

        c = deepcopy(endp)
        c = [str(l) for l in list(c.numpy())]
        # endp = endp.unsqueeze(0)
        endp = endp.repeat(100, 1)
        endp = endp.unsqueeze(0)
        endp = endp.repeat(3, 1, 1)

        endp = endp.float()
        endp = endp.cuda()
        res = model.sample(endp, number_generate, dataset.model)
        valid = len(res) * 100 / number_generate
        print("valid : {} %".format(valid))
        # writer.add_scalar("Valid", valid, cnt)
        res = [robust_standardizer(mol) for mol in res]
        res = list(filter(lambda x: x is not None, res))
        mols = res
        print("Mols obtained")
        print(mols)
        vals_another = requests.post("https://backend.syntelly.com/tempSmilesArrToPredict",
                                     json={'smiles': mols}).json()
        for idx in range(len(vals_another)):
            elem = vals_another[idx]['data']
            for e in elem:
                e["endpoint_id"] = endpoints_id2name[e["endpoint_id"]]
        e2v = []
        for idx in range(len(vals_another)):
            e2v.append(dict(zip([e['endpoint_id'] for e in vals_another[idx]['data']],
                                [e['value'] for e in vals_another[idx]['data']])))
        smiles = [val['smiles'] for val in vals_another]
        mols = [robust_standardizer(mol) for mol in smiles]
        mols = [Chem.MolFromSmiles(mol) for mol in mols]
        molecular_weights = [CalcExactMolWt(mol) for mol in mols]
        logp = [MolLogP(mol) for mol in mols]
        atom_count = [mol.GetNumAtoms() for mol in mols]
        molar_reflactivity = [MolMR(mol) for mol in mols]
        numRings = [CalcNumRings(mol) for mol in mols]
        numRotBonds = [CalcNumRotatableBonds(mol) for mol in mols]
        numHAcceptors = [NumHAcceptors(mol) for mol in mols]
        numHDonors = [NumHDonors(mol) for mol in mols]
        bcf = [e['Bioconcentration factor'] for e in e2v]
        dev_tox = [e['Developmental toxicity'] for e in e2v]
        flash_point = [e['Flash point'] for e in e2v]
        boiling_point = [e['Boiling point'] for e in e2v]
        melting_points = [e['Melting point'] for e in e2v]
        water_solubility = [e['Water Solubility'] for e in e2v]

        result = [0] * len(smiles)
        for idx in range(len(smiles)):
            val = 0
            if (molecular_weights[idx] <= 480 and molecular_weights[idx] >= 160):
                val += 1
            if (logp[idx] <= 5.6 and logp[idx] >= -0.4):
                val += 1
            if (atom_count[idx] <= 70 and atom_count[idx] >= 20):
                val += 1
            if (molar_reflactivity[idx] >= 40 and molar_reflactivity[idx] <= 130):
                val += 1
            if (bcf[idx] < 3):
                val += 1
            if (dev_tox[idx] == 'Negative'):
                val += 1
            if (flash_point[idx] > (350 - 273.15)):
                val += 1
            if (boiling_point[idx] > (300 - 273.15)):
                val += 1
            if (numRings[idx] > 0):
                val += 1
            if (numRotBonds[idx] < 5):
                val += 1
            if (numHAcceptors[idx] <= 10):
                val += 1
            if (numHDonors[idx] <= 5):
                val += 1

            if (val / 12 >= loyality):
                result[idx] = val

        print(result)
        for idx in range(len(result)):
            if (result[idx] > 0):
                result_arr.append((smiles[idx], result[idx],
                                   (melting_points[idx], boiling_point[idx], water_solubility[idx]),
                                   mean_squared_error(
                                       scaler.transform(np.array(
                                           [[melting_points[idx], boiling_point[idx], water_solubility[idx]]])),
                                       scaler.transform(np.array([conditions]))
                                   )))

    result_arr.sort(key=lambda x: x[3])

    print(result_arr[:num_of_mols])
    return jsonify(result_arr[:num_of_mols])