Esempio n. 1
0
def check_ligand(file_path):
    bool = False
    if os.path.isfile(file_path):
        suppl = Chem.SDMolSupplier(file_path)
        for mol in suppl:
            if mol is not None:
                # components of rule
                hydrogen_bond_doner = True if Lipinski.NumHDonors(
                    mol) <= 5 else False
                hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors(
                    mol) <= 10 else False
                molecular_mass = True if Descriptors.ExactMolWt(
                    mol) <= 500 else False
                octanol_water_partition_coefficient_logP = True if Crippen.MolLogP(
                    mol) <= 5 else False
                components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP

                # variants
                partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP(
                    mol) <= 5.6 else False
                molar_refractivity = True if 40 <= Crippen.MolMR(
                    mol) <= 130 else False
                molecular_weight = True if 180 <= Descriptors.ExactMolWt(
                    mol) <= 500 else False
                number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount(
                    mol) <= 70 else False
                polar_surface_area = True if MolSurf.TPSA(
                    mol) <= 140 else False
                variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area

                if (components_rank == 4) and (variants_rank == 4
                                               or variants_rank == 5):
                    bool = True
    return bool
Esempio n. 2
0
 def testIssue80(self):
     from rdkit.Chem import Lipinski
     m = Chem.MolFromSmiles('CCOC')
     ref = Crippen.MolLogP(m)
     Lipinski.NHOHCount(m)
     probe = Crippen.MolLogP(m)
     self.failUnless(probe == ref)
def canonicalize(smi_list, showprogress=False):
    mol_list = []
    if showprogress:
        print('Canonicalising mols')
        for smi in tqdm(smi_list):
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    else:
        for smi in smi_list:
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    mol_list = list(set(mol_list))
    final_list = []
    if showprogress:
        print('Size of unfiltered final library: {}'.format(len(mol_list)))
        print('Filtering by n_heavy and logP:')
        for smi in tqdm(mol_list):
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    else:
        for smi in mol_list:
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    return final_list
Esempio n. 4
0
def properties_mw_logp(filepaths):

    properties = []

    for i, fname in enumerate(filepaths):
        with open(filepaths[i], 'r') as f:
            reader = csv.reader(f)

            it = iter(reader)
            if not ("generated" in fname):
                for row in it:
                    try:
                        properties.append([float(row[2]), float(row[3]), i])
                    except:
                        print("")
            else:
                for row in it:
                    try:
                        mol = Chem.MolFromSmiles(row[0])
                        x, y = desc.MolWt(mol), Crippen.MolLogP(mol)
                        properties.append([x, y, i])
                    except:
                        print("Non-Canonical SMILES: " + row[0])

    df = pd.DataFrame(properties[2000:2355], columns=['MW', 'logP', 'Label'])
    return df
Esempio n. 5
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
Esempio n. 6
0
def LogP(smile):
    smile = str(smile)
    try:
        m = Chem.MolFromSmiles(smile)
        return Crippen.MolLogP(m)
    except:
        return 'NaN'
Esempio n. 7
0
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
               redraw=False,keepHs=False,
               skipProps=False,addComputedProps=False,
               skipSmiles=False,
               uniqNames=None,namesSeen=None):
  if not mol:
    raise ValueError('no molecule')
  if keepHs:
    Chem.SanitizeMol(mol)
  try:
    nm = mol.GetProp(nameProp)
  except KeyError:
    nm = None
  if not nm:
    nm = 'Mol_%d'%nDone
  if uniqNames and nm in namesSeen:
    logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm)
    return None
  namesSeen.add(nm)
  row = [nm]
  if not skipProps:
    if addComputedProps:
      nHD=Lipinski.NumHDonors(mol)
      mol.SetProp('DonorCount',str(nHD))
      nHA=Lipinski.NumHAcceptors(mol)
      mol.SetProp('AcceptorCount',str(nHA))
      nRot=Lipinski.NumRotatableBonds(mol)
      mol.SetProp('RotatableBondCount',str(nRot))
      MW=Descriptors.MolWt(mol)
      mol.SetProp('AMW',str(MW))
      logp=Crippen.MolLogP(mol)
      mol.SetProp('MolLogP',str(logp))

    pns = list(mol.GetPropNames())
    pD={}
    for pi,pn in enumerate(pns):
      if pn.lower()==nameCol.lower(): continue
      pv = mol.GetProp(pn).strip()
      if pv.find('>')<0 and pv.find('<')<0:
        colTyp = globalProps.get(pn,2)
        while colTyp>0:
          try:
            tpi = typeConversions[colTyp][1](pv)
          except:
            colTyp-=1
          else:
            break
        globalProps[pn]=colTyp
        pD[pn]=typeConversions[colTyp][1](pv)
      else:
        pD[pn]=pv
  else:
    pD={}
  if redraw:
    AllChem.Compute2DCoords(m)
  if not skipSmiles:
    row.append(Chem.MolToSmiles(mol,True))
  row.append(DbModule.binaryHolder(mol.ToBinary()))
  row.append(pD)
  return row
Esempio n. 8
0
def logP(smile, train_smiles=None):
    low_logp = -2.12178879609
    high_logp = 6.0429063424
    logp = Crippen.MolLogP(Chem.MolFromSmiles(smile))
    val = remap(logp, low_logp, high_logp)
    val = np.clip(val, 0.0, 1.0)
    return val
  def _testLogPLong2(self):
      """ test calculation of Lipinski params
   
 """
      fName = 'PP_descrs_regress.2.csv'
      col = 33
      self.__testDesc(fName, col, lambda x: Crippen.MolLogP(x, includeHs=1))
Esempio n. 10
0
    def calc_lipinski(self, mol):
        """
        Returns:     a tuple consisting of:
            - a boolean indicating whether the molecule passed Lipinski test
            - a dictionary giving the values of the Lipinski check.

        NOTE:   Lipinski's rules are:
            - Hydrogen bond donors <= 5
            - Hydrogen bond acceptors <= 10
            - Molecular weight < 500 daltons
            - logP < 5
        """

        num_hdonors = Lipi.NumHDonors(mol)
        num_hacceptors = Lipi.NumHAcceptors(mol)
        mol_weight = Descriptors.MolWt(mol)
        mol_logp = round(Crippen.MolLogP(mol), 4)

        return ((num_hdonors <= 5 and num_hacceptors <= 10 and mol_weight < 500
                 and mol_logp < 5), {
                     'hydrogen_bond_donors': num_hdonors,
                     'hydrogen_bond_acceptors': num_hacceptors,
                     'molecular_weight': mol_weight,
                     'logp': mol_logp
                 })
Esempio n. 11
0
def generate(smiles):
    moldata = []
    for elem in smiles:
        mol = Chem.MolFromSmiles(elem)
        moldata.append(mol)

    baseData = np.arange(1, 1)
    i = 0
    for mol in moldata:

        desc_MolLogP = Crippen.MolLogP(mol)
        desc_MolWt = Descriptors.MolWt(mol)
        desc_NumRotatableBonds = Lipinski.NumRotatableBonds(mol)
        desc_AromaticProportion = getAromaticProportion(mol)

        row = np.array([desc_MolLogP,
                        desc_MolWt,
                        desc_NumRotatableBonds,
                        desc_AromaticProportion])

        if i == 0:
            baseData = row
        else:
            baseData = np.vstack([baseData, row])
        i = i + 1

    columnNames = ["MolLogP", "MolWt", "NumRotatableBonds", "AromaticProportion"]
    descriptors = pd.DataFrame(data=baseData, columns=columnNames)

    return descriptors
Esempio n. 12
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    if mol is None:
        raise ValueError('You need to provide a mol argument.')
    mol = Chem.RemoveHs(mol)
    qedProperties = QEDproperties(
        MW=rdmd._CalcMolWt(mol),
        ALOGP=Crippen.MolLogP(mol),
        HBA=sum(
            len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
            if mol.HasSubstructMatch(pattern)),
        HBD=rdmd.CalcNumHBD(mol),
        PSA=MolSurf.TPSA(mol),
        ROTB=rdmd.CalcNumRotatableBonds(mol,
                                        rdmd.NumRotatableBondsOptions.Strict),
        AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol),
                                                AliphaticRings)),
        ALERTS=sum(1 for alert in StructuralAlerts
                   if mol.HasSubstructMatch(alert)),
    )
    # The replacement
    # AROM=Lipinski.NumAromaticRings(mol),
    # is not identical. The expression above tends to count more rings
    # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4
    # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O
    # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1  uses 2, should be 0 ?
    return qedProperties
Esempio n. 13
0
    def water_octanol_partition_coefficient_scores(mols, norm=False):
        scores = [MolecularMetrics._avoid_sanitization_error(lambda: Crippen.MolLogP(mol)) if mol is not None else None
                  for mol in mols]
        scores = np.array(list(map(lambda x: -3 if x is None else x, scores)))
        scores = np.clip(MolecularMetrics.remap(scores, -2.12178879609, 6.0429063424), 0.0, 1.0) if norm else scores

        return scores
Esempio n. 14
0
def get_properties(mols):
    properties = []
    for mol in tqdm(mols):
        molwt = Descriptors.MolWt(mol)
        logp = Crippen.MolLogP(mol)
        properties.append((molwt, logp))
    return properties
Esempio n. 15
0
  def testRepeat(self):
    self._readData()
    nMols = len(self.smis)
    for i in range(nMols):
      smi = self.smis[i]
      mol = Chem.MolFromSmiles(smi)

      clog = self.clogs[i]
      tmp = Crippen.MolLogP(mol)
      tmp = Crippen.MolLogP(mol)
      self.failUnless(feq(clog,tmp),'bad logp fooutF,r %s: %4.4f != %4.4f'%(smi,clog,tmp))

      mr = self.mrs[i]
      tmp = Crippen.MolMR(mol)
      tmp = Crippen.MolMR(mol)
      self.failUnless(feq(mr,tmp),'bad MR for %s: %4.4f != %4.4f'%(smi,mr,tmp))
Esempio n. 16
0
def logP(smile, train_smiles):
    low_logp = -2.10799552492
    high_logp = 2.71567964162
    logp = Crippen.MolLogP(Chem.MolFromSmiles(smile))
    val = remap(logp, low_logp, high_logp)
    val = np.clip(logp, 0.0, 1.0)
    return val
Esempio n. 17
0
def logP(mol, train_smiles=None):
    val = Crippen.MolLogP(mol)
    if NORMALIZE:
        low_logp = -2.12178879609
        high_logp = 6.0429063424
        val = remap(val, low_logp, high_logp)
        val = np.clip(val, 0.0, 1.0)
    return val
Esempio n. 18
0
def _rdkit_eval(entry: dict) -> dict:
    """Computes the chemical properties from RDKit,
    adds them to the input dictionary"""
    mol = Chem.MolFromSmiles(entry['smiles'])
    entry['logP'] = Crippen.MolLogP(mol)
    entry['QED'] = QED.qed(mol)
    entry['SA_score'] = calculateScore(mol)
    return entry
Esempio n. 19
0
    def testLipinskiLong(self):
        """ Lipinski parameter """
        if not doLong:
            raise unittest.SkipTest('long test')
        fName = 'PP_descrs_regress.csv'
        self.__testDesc(fName, 30, Lipinski.NumHDonors)
        self.__testDesc(fName, 31, Lipinski.NumHeteroatoms)
        self.__testDesc(fName, 32, Lipinski.NumRotatableBonds)
        self.__testDesc(fName, 33, lambda x: Crippen.MolLogP(x, includeHs=1))

        fName = 'Block_regress.Lip.csv'
        self.__testDesc(fName, 1, Lipinski.NumHAcceptors)
        self.__testDesc(fName, 2, Lipinski.NumHDonors)
        self.__testDesc(fName, 3, Lipinski.NumHeteroatoms)
        self.__testDesc(fName, 4, Lipinski.NumRotatableBonds)

        fName = 'PP_descrs_regress.2.csv'
        self.__testDesc(fName, 33, lambda x: Crippen.MolLogP(x, includeHs=1))
def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp,
              mol_TPSA)
    return proper
Esempio n. 21
0
def log_partition_coefficient(smiles):
    '''
    Returns the octanol-water partition coefficient given a molecule SMILES 
    string
    '''
    try:
        mol = Chem.MolFromSmiles(smiles)
    except Exception as e:
        raise SmilesError('%s returns a None molecule' % smiles)

    return Crippen.MolLogP(mol)
Esempio n. 22
0
def get_crippen(x):
    """
    Get the logP value for molecule X
    :param x: Molecule
    :return: float: logP
    """
    try:
        cp = Crippen.MolLogP(x)
    except:
        return np.nan
    return cp
Esempio n. 23
0
def evaluate_chem_mol(mol):
    try:
        Chem.GetSSSR(mol)
        clogp = Crippen.MolLogP(mol)
        mw = MolDescriptors.CalcExactMolWt(mol)
        tpsa = Descriptors.TPSA(mol)
        ret_val = [True, 320 < mw < 420, 2 < clogp < 3, 40 < tpsa < 60]
    except:
        ret_val = [False] * 4

    return ret_val
Esempio n. 24
0
 def calc_esol_descriptors(self, mol):
     """
     Calcuate mw,logp,rotors and aromatic proportion (ap)
     :param mol: input molecule
     :return: named tuple with descriptor values
     """
     mw = Descriptors.MolWt(mol)
     logp = Crippen.MolLogP(mol)
     rotors = Lipinski.NumRotatableBonds(mol)
     ap = self.calc_ap(mol)
     return self.Descriptor(mw=mw, logp=logp, rotors=rotors, ap=ap)
Esempio n. 25
0
  def testLogP(self):
    self._readData()
    nMols = len(self.smis)
    #outF = file(self.fName,'w')
    for i in range(nMols):
      smi = self.smis[i]
      mol = Chem.MolFromSmiles(smi)

      if 1:
        clog = self.clogs[i]
        tmp = Crippen.MolLogP(mol)
        self.failUnless(feq(clog,tmp),'bad logp for %s: %4.4f != %4.4f'%(smi,clog,tmp))

        mr = self.mrs[i]
        tmp = Crippen.MolMR(mol)
        self.failUnless(feq(mr,tmp),'bad MR for %s: %4.4f != %4.4f'%(smi,mr,tmp))
      else:
        clog = Crippen.MolLogP(mol)
        mr = Crippen.MolMR(mol)
        print >>outF,'%s,%.4f,%.4f'%(smi,clog,mr)
Esempio n. 26
0
def canonicalize_and_filter(smi_list, showprogress=False):
    """
    Function that returns the set of unique RDKit molecules from a list of input RDKit molecules
    by turning them into canonical SMILES and checking the strings for uniqueness.

    Also performs rudimentary Lipinski rule-of-5 filtering by dropping molecules with logP >5 and 
    more than 17 heavy atoms.
    """
    mol_list = []
    if showprogress:
        print('Canonicalising mols')
        for smi in tqdm(smi_list):
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    else:
        for smi in smi_list:
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(mol)
    mol_list = list(set(mol_list))
    final_list = []
    if showprogress:
        print('Size of unfiltered final library: {}'.format(len(mol_list)))
        print('Filtering by n_heavy and logP:')
        for smi in tqdm(mol_list):
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    else:
        for smi in mol_list:
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    return final_list
 def descriptors(self, mol):
     aromatic_frac = self.arofrac(mol)
     mw = Descriptors.ExactMolWt(mol, False)
     valence_e = Descriptors.NumValenceElectrons(mol)
     h_acceptors = Lipinski.NumHAcceptors(mol)
     h_donors = Lipinski.NumHDonors(mol)
     NO_counts = Lipinski.NOCount(mol)
     NHOH_count = Lipinski.NHOHCount(mol)
     rotors = Lipinski.NumRotatableBonds(mol)
     SP3_frac = Lipinski.FractionCSP3(mol)
     logP = Crippen.MolLogP(mol)
     SP_bonds = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[^1]')))
     return([aromatic_frac,mw,valence_e,h_acceptors,h_donors,NO_counts,NHOH_count, rotors,SP3_frac,logP,SP_bonds])
Esempio n. 28
0
def LogP(smile):
    '''
    Given the Smile, this function compute the partition coefficient for each Chemical
    Inputs:
        - smile (string): original SMILES code
    Outputs:
        - LogP (float): partition coefficient of the chemical (NaN if not found)
    '''
    smile = str(smile)
    try:
        m = Chem.MolFromSmiles(smile)
        return Crippen.MolLogP(m)
    except:
        return 'NaN'
def apply_lead_like_filters(data_dict):
    """Apply lead like filtering, exclude structures
    AlogP > 4.5
    mol wt > 450 g/mmol
    :param data_dict: {'CHEMBL12345' : 'c1ccccc1OC'}
    :return: filtered smiles dict
    """
    new_dict = {}
    for k, v in data_dict.items():
        rdkit_mol = Chem.MolFromSmiles(v)
        if rdkit_mol:
            if Crippen.MolLogP(rdkit_mol) < 4.5 or Descriptors.ExactMolWt(rdkit_mol) < 450:
                new_dict[k] = v
    return new_dict
Esempio n. 30
0
    def getDiscriptor(self):
        from rdkit.Chem import Crippen
        from rdkit import Chem
        import pandas as pd
        from rdkit.Chem import Descriptors, Lipinski
        import os

        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')

        #df = pd.read_csv('extractInchi.csv',header=None)
        columns = [
            'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts',
            'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3',
            'AromaticCarbocycles', 'AromaticHeterocycles'
        ]
        CAS = df['CAS']
        SMILES = df['canonical_smiles']

        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(CAS, SMILES):
            mol = Chem.MolFromSmiles(smiles)
            wt = Descriptors.MolWt(mol)
            rot = Lipinski.NumRotatableBonds(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            logp = Crippen.MolLogP(mol)
            aromaticHeavyatoms = len(
                mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
            numAtoms = mol.GetNumAtoms()
            aromprop = float(aromaticHeavyatoms / numAtoms)
            TPSA = Descriptors.TPSA(mol)
            HDonors = Descriptors.NumHDonors(mol)
            HAcceptors = Descriptors.NumHAcceptors(mol)

            FractionCSP3 = Descriptors.FractionCSP3(mol)
            AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol)
            AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol)

            (print(HDonors, HAcceptors))
            tempDf = pd.DataFrame([[
                cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors,
                FractionCSP3, AromaticCarbocycles, AromaticHeterocycles
            ]],
                                  columns=columns)
            resultDf = pd.concat([resultDf, tempDf])
        resultDf.to_csv('Descriptors.csv', index=False)