Example #1
0
  def testGithub1973(self):

    smiles = ("c1ccccc1S", "c1cscc1", "CC(=S)C", "CSC", "CS(=O)C", "CP(C)C", "CP=O", "CP(C)(C)=O",
              "C[PH](C)=O")
    orig_tpsa = (0, 0, 0, 0, 17.07, 0.0, 17.07, 17.07, 17.07)
    new_tpsa = (38.8, 28.24, 32.09, 25.30, 36.28, 13.59, 51.21, 26.88, 40.54)
    for i, smi in enumerate(smiles):
      mol = Chem.MolFromSmiles(smi)
      oTPSA = rdMD.CalcTPSA(mol)
      self.assertAlmostEqual(oTPSA, orig_tpsa[i], 2)
      nTPSA = rdMD.CalcTPSA(mol, force=True, includeSandP=True)
      self.assertAlmostEqual(nTPSA, new_tpsa[i], 2)
Example #2
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
Example #3
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Example #4
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
def main():
    sub_df = pd.read_csv("submissions_final_result.csv")

    cmp_ds = []

    for _, row in sub_df.iterrows():
        cmp_dict = {}
        mol = Chem.MolFromSmiles(row['smiles_string'])
        cmp_dict['submission_id'] = row['submission_id']
        cmp_dict['smiles_string'] = row['smiles_string']

        # Lipinski's rule
        cmp_dict['h_bond_donor'] = rd.CalcNumLipinskiHBD(
            mol)  # Lipinski Hbond donor
        cmp_dict['h_bond_acceptor'] = rd.CalcNumLipinskiHBA(
            mol)  # Lipinski Hbond acceptor
        cmp_dict['moluclar_mass'] = rd._CalcMolWt(mol)  # Molecular Weight
        cmp_dict['log_p'] = rd.CalcCrippenDescriptors(mol)[
            0]  # Partition coefficient

        # Topological polar surface area
        cmp_dict['topological_polar_surface_area'] = rd.CalcTPSA(mol)

        cmp_ds.append(cmp_dict)

    result = pd.merge(sub_df,
                      pd.DataFrame(cmp_ds),
                      on=['submission_id', 'smiles_string'])
    result.to_csv("lipinski_psa_result.csv", index=False, encoding='utf-8')
def get_mol_props(mol: AllChem.Mol):
    """
    Get the properties of a molecule.
    """
    logP = Descriptors.MolLogP(mol)
    tpsa = rdMolDescriptors.CalcTPSA(mol)
    alpha = rdMolDescriptors.CalcHallKierAlpha(mol)
    MR = Descriptors.MolMR(mol)
    asa = rdMolDescriptors.CalcLabuteASA(mol)
    return [logP, tpsa, alpha, MR, asa]
 def calculate_properties(self, mol):
     """this method calculates basic properties for the smiles
     returns : list of int or float (properties)"""
     properties = []
     properties.append(mol.GetNumAtoms())
     properties.append(desc.CalcCrippenDescriptors(mol)[0])
     properties.append(desc.CalcTPSA(mol))
     properties.append(desc.CalcNumRotatableBonds(mol))
     properties.append(desc.CalcFractionCSP3(mol))
     return properties
Example #8
0
    def calculate(self):
        tpsa = rdMolDescriptors.CalcTPSA(self.mol)
        if self._no_only:
            return tpsa

        for atom in self.mol.GetAtoms():
            atomic_num = atom.GetAtomicNum()

            if atomic_num == 15:
                tpsa += self._get_phosphorus_contrib(atom)
            elif atomic_num == 16:
                tpsa += self._get_sulfur_contrib(atom)

        return tpsa
def main(in_file, output):

  Cmpds  = {}
  InMols = rdkit_open([in_file])
  print('\n # Number of input molecule: {0}'.format(len(InMols)))
  for mol in InMols:
    m = {}

    name = mol.GetProp('_Name').split()[0]
    
    m['Name'] = name
    m['Formula'] = rd.CalcMolFormula(mol)
    m['SMILES'] = Chem.MolToSmiles(mol)

    m['MW']   = rd._CalcMolWt(mol)               # Molecular Weight
    m['logP'] = rd.CalcCrippenDescriptors(mol)[0]  # Partition coefficient
    m['HDon'] = rd.CalcNumLipinskiHBD(mol)      # Lipinski Hbond donor
    m['HAcc'] = rd.CalcNumLipinskiHBA(mol)      # Lipinski Hbond acceptor
    m['TPSA'] = rd.CalcTPSA(mol)                # Topological polar surface area

    m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond
    m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1]         # Molar refractivity
    m['AliRing'] = rd.CalcNumAliphaticRings(mol)        # Aliphatic ring number
    m['AroRing'] = rd.CalcNumAromaticRings(mol)         # Aromatic ring number
#    m['Stereo'] = rd.CalcNumAtomStereoCenters(mol)      # Stereo center number
#    m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol)  # unspecified stereo

    m['SMILES'] = Chem.MolToSmiles(mol, 
                    isomericSmiles=True, allHsExplicit=False)
    Cmpds[name] = m

  ####################################

  df = pd.DataFrame.from_dict(Cmpds, orient='index')
  df.index.name = 'Name'

  # Columns of data to print out
  Columns = [ 'Formula',
              'MW',    'logP',   'HDon',    'HAcc',    'TPSA',
              'Rotat', 'MolRef', 'AliRing', 'AroRing', 
              #'Stereo', 'UnspStereo', 
              'SMILES', ]
  reorder = df[Columns]

  # Output to CSV
  reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8',
                  float_format='%.5f', header=True )

  # Output to Excel
  reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Example #11
0
 def calculate_properties(self, smiles=None, mol=None, props=[]):
     """this method calculates basic properties for the mol
     returns : error (bool)"""
     if len(props) == 0:
         return True
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
     if mol is None:
         return True
     if 'py_formula' in props:
         self.data['py_formula'] = desc.CalcMolFormula(mol)
     if 'py_em' in props:
         self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5)
     if 'py_n_Cl_Br' in props:
         all_atoms = []
         for atom in mol.GetAtoms():
             all_atoms.append(atom.GetSymbol())
         n_Cl = all_atoms.count('Cl')
         n_Br = all_atoms.count('Br')
         self.data['py_n_Cl_Br'] = n_Cl + n_Br
     if 'py_na' in props:
         self.data['py_na'] = mol.GetNumAtoms()
     if 'py_mw' in props:
         self.data['py_mw'] = desc._CalcMolWt(mol)
     if 'py_fsp3' in props:
         self.data['py_fsp3'] = desc.CalcFractionCSP3(mol)
     if 'py_rb' in props:
         self.data['py_rb'] = desc.CalcNumRotatableBonds(mol)
     if 'py_tpsa' in props:
         self.data['py_tpsa'] = desc.CalcTPSA(mol)
     if 'py_clogp' in props:
         self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0]
     if 'py_nar' in props:
         self.data['py_nar'] = desc.CalcNumAromaticRings(mol)
     if 'py_nhba' in props:
         self.data['py_nhba'] = desc.CalcNumHBA(mol)
     if 'py_nhbd' in props:
         self.data['py_nhbd'] = desc.CalcNumHBD(mol)
     return False
Example #12
0
def feature_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.MQNs_(mol)
    
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcExactMolWt(mol))
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcFractionCSP3(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol)))
    fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticRings(mol))
    fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol))
    fp.append(rdMolDescriptors.CalcNumRings(mol))
    fp.append(rdMolDescriptors.CalcNumAmideBonds(mol))
    fp.append(rdMolDescriptors.CalcNumHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol))
    fp.append(rdMolDescriptors.CalcTPSA(mol))
    
    return np.array(fp)
Example #13
0
def generateCompoundPropertiesTask(structure, debug=False):
    if debug:
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    molecule = structure.molecule
    if not molecule.compoundProperty:
        prop = CompoundProperties(molecule=molecule)
    else:
        prop = molecule.compoundProperty

    saltRemover = SaltRemover()
    mol = Chem.MolFromMolBlock(str(structure.molfile))
    base = saltRemover.StripMol(mol)
    prop.hbd = Descriptors.CalcNumHBD(mol)
    prop.hba = Descriptors.CalcNumHBA(mol)
    prop.rtb = Descriptors.CalcNumRotatableBonds(mol)
    prop.alogp = Crippen.MolLogP(mol)
    prop.psa = Descriptors.CalcTPSA(mol)
    prop.full_mwt = NewDescriptors.MolWt(mol)
    # prop.exact_mass = Descriptors.CalcExactMolWt(mol)

    if base.GetNumAtoms():
        prop.mw_freebase = NewDescriptors.MolWt(base)

    prop.full_molformula = Descriptors.CalcMolFormula(mol)

    try:
        prop.save()

    except IntegrityError as e:
        if debug:
            print e.message
        else:
            raise e
Example #14
0
def computeFeatures(mol):
    numRings = rdMolDescriptors.CalcNumRings(mol)
    numRotBonds = rdMolDescriptors.CalcNumRotatableBonds(mol)
    nitrogenCount = countNitrogens(mol)
    oxygenCount = countOxygens(mol)
    carbonCount = countCarbons(mol)
    boronCount = countBorons(mol)
    phosCount = countPhos(mol)
    sulfurCount = countSulfurs(mol)
    fluorCount = countFluorine(mol)
    iodCount = countIodine(mol)
    doubleBonds = countDoubleBonds(mol)
    surf_area = rdMolDescriptors.CalcLabuteASA(mol)
    mol_weight = rdMolDescriptors.CalcExactMolWt(mol)
    tpsa = rdMolDescriptors.CalcTPSA(mol)
    dist_hs = recurseMolHCount(mol)
    output = [
        numRings, nitrogenCount, oxygenCount, carbonCount, boronCount,
        phosCount, sulfurCount, fluorCount, iodCount, doubleBonds, surf_area,
        mol_weight, tpsa
    ]
    for d in dist_hs:
        output.append(dist_hs[d])
    return output
def get_fingerprint(SMILES=None, E_BIND=None):
    """
    PRE: Takes in a MOLECULE as a SMILES
    POST: Prints its finger prints as two list, the first contains the names, the second contains the fingerprints
    """

    def get_atoms_coords(RDKIT_BLOCK):
        """Takes as input an RDKIT BLOCK and returns a list of atoms with a numpy array containing the coordinates"""
        RDKIT_BLOCK = RDKIT_BLOCK.split('\n')
        atm_number = int(RDKIT_BLOCK[3][:3])
        RDKIT_BLOCK = [x.split() for x in RDKIT_BLOCK]
        atm_list = []
        coords_array = np.zeros([atm_number, 3], dtype=float)
        for i, line in enumerate(RDKIT_BLOCK[4:4 + atm_number]):
            coords_atm = line
            atm_list.append(coords_atm[3])
            coords_array[i, :] = coords_atm[:3]
        return atm_list, coords_array

    def get_atom_types(mol):
        """
        PRE: Takes in the mol
        POST: Returns a dictionary with the atom types and numbers
        """
        atom_types = {}
        for atom in mol.GetAtoms():
            symbol = atom.GetSymbol()
            if symbol in atom_types:
                atom_types[symbol] += 1
            else:
                atom_types[symbol] = 1
        return atom_types

    def AreRingFused(mol):
        """
        PRE  : Takes in a mol rdkit
        POST : Returns the max number of fused rings. That is the maximum number of rings any atom belongs to
        """
        rings = Chem.GetSymmSSSR(mol)
        ring_dic = {}
        for ring in rings:
            for atom in list(ring):
                if atom in ring_dic:
                    ring_dic[atom] += 1
                else:
                    ring_dic[atom] = 1
        if ring_dic.values() == []:
            return 0
        else:
            return max(ring_dic.values())

    def getVolume(mol, atom_types):
        """
        PRE: Takes in a mol with HYDROGENS ADDED
        POST: Returns its volume computed as a linear combination of the contribution of the vdW volumes
        """
        index_of_vols = {'H': 7.24, 'C': 20.58, 'N': 15.60, 'O': 14.71, 'F': 13.31, 'Cl': 22.45, 'Br': 26.52,
                         'I': 32.52, 'P': 24.43, 'S': 24.43, 'As': 26.52, 'B': 40.48, 'Si': 38.79, 'Se': 28.73,
                         'Te': 36.62}
        gross_volume = 0
        # for sym in atom_types:
            # gross_volume += atom_types[sym] * index_of_vols[sym]
        bonds = mol.GetNumBonds()
        rings = Chem.GetSymmSSSR(mol)
        # print 'aromatic ring count is ',descriptors.CalcNumAromaticRings(mol)
        # print 'aliphatic ring count is ',descriptors.CalcNumAliphaticRings(mol)
        ra = 0
        largest_ra = 0
        rna = 0
        largest_rna = 0
        for ringId in range(len(rings)):
            if isRingAromatic(mol, tuple(rings[ringId])):
                ra += 1
                if largest_ra < len(rings[ringId]):
                    largest_ra = len(rings[ringId])
            else:
                rna += 1
                if largest_rna < len(rings[ringId]):
                    largest_rna = len(rings[ringId])
        # volume = gross_volume - 5.92 * bonds - 14.7 * ra - 3.8 * rna
        try:
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            volume = AllChem.ComputeMolVolume(mol)
        except:
            raise ValueError("Can't build the molecule")
        return volume, ra, rna, largest_ra, largest_rna

    def isRingAromatic(mol, ring):
        """
        PRE: Takes in a mol and a ring given as a tuple of atom id
        POST: Returns TRUE is all the atoms inside the ring are aromatic and FALSE otherwise
        """
        aromatic = True
        for ids in ring:
            if mol.GetAtomWithIdx(ids).GetIsAromatic():
                # print ids
                pass
            else:
                aromatic = False
                break
        return aromatic

    mol = SMILES
    features = [
        'atomNbr',
        'Volume',
        'NAtom',
        'OAtom',
        'SAtom',
        'PAtom',
        'ClAtom',
        'BrAtom',
        'FAtom',
        'IAtom',
        'AromaticRingNumber',
        'LargestAromaticRingAtomNbr',
        'NonAromaticRingNumber',
        'LargestNonAromaticRingAtomNbr',
        'MaxNbrFusedRings',
        'SurfaceArea',
        'Charge',
        # 'MinRadiusOfCylinder',
        # 'RadiusOfCylinderBestConf',
        'NitroNbr',
        'AlcoholNbr',
        'KetoneNbr',
        'NitrileNbr',
        'ThiolNbr',
        'Phenol_likeNbr',
        'EsterNbr',
        'SulfideNbr',
        'CarboxilicAcidNbr',
        'EtherNbr',
        'AmideNbr',
        'AnilineNbr',
        'PrimaryAmineNbr',
        'SecondaryAmineNbr',
        'RotableBondNum',
        'HBondDonor',
        'HBondAcceptor',
        'MolLogP',
        'MolMR'
    ]
    for i in range(6):
        features.append('Chi{}v'.format(i + 1))
        features.append('Chi{}n'.format(i + 1))
        if i < 3:
            features.append('Kappa{}'.format(i + 1))

    feature_dic = dict.fromkeys(features)
    if mol == None:
        return sorted(feature_dic.keys())

    mol = Chem.MolFromSmiles(SMILES)
    mol = Chem.AddHs(mol)

    feature_dic['RotableBondNum'] = descriptors.CalcNumRotatableBonds(mol)

    for i in range(6):
        feature_dic['Chi{}v'.format(i + 1)] = descriptors.CalcChiNv(mol, i + 1)
        feature_dic['Chi{}n'.format(i + 1)] = descriptors.CalcChiNn(mol, i + 1)

    feature_dic['Kappa1'] = descriptors.CalcKappa1(mol)
    feature_dic['Kappa2'] = descriptors.CalcKappa2(mol)
    feature_dic['Kappa3'] = descriptors.CalcKappa3(mol)

    feature_dic['HBondAcceptor'] = descriptors.CalcNumHBA(mol)
    feature_dic['HBondDonor'] = descriptors.CalcNumHBD(mol)

    CrippenDescriptors = descriptors.CalcCrippenDescriptors(mol)
    feature_dic['MolLogP'] = CrippenDescriptors[0]
    feature_dic['MolMR'] = CrippenDescriptors[1]

    atom_types = get_atom_types(mol)
    for feat, symbol in zip(['NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom'],
                            ['N', 'O', 'S', 'P', 'Cl', 'Br', 'F', 'I']):
        if symbol in atom_types:
            feature_dic[feat] = atom_types[symbol]
        else:
            feature_dic[feat] = 0

    feature_dic['atomNbr'] = mol.GetNumHeavyAtoms()
    feature_dic['Volume'], feature_dic['AromaticRingNumber'], feature_dic['NonAromaticRingNumber'], feature_dic[
        'LargestAromaticRingAtomNbr'], feature_dic['LargestNonAromaticRingAtomNbr'] = getVolume(mol, atom_types)
    feature_dic['MaxNbrFusedRings'] = AreRingFused(mol)
    feature_dic['SurfaceArea'] = descriptors.CalcTPSA(mol)
    feature_dic['Charge'] = Chem.GetFormalCharge(mol)

    funct_dic = {
        '[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]': 'NitroNbr',
        '[#6][OX2H]': 'AlcoholNbr',
        '[NX1]#[CX2]': 'NitrileNbr',
        '[#6][CX3](=O)[#6]': 'KetoneNbr',
        '[#16X2H]': 'ThiolNbr',
        "[OX2H][cX3][c]": 'Phenol_likeNbr',
        '[#6][CX3](=O)[OX2H0][#6]': 'EsterNbr',
        '[#16X2H0]': 'SulfideNbr',
        '[CX3](=O)[OX2H1]': 'CarboxilicAcidNbr',
        '[OD2]([#6])[#6]': 'EtherNbr',
        # '[NX3][CX3](=[OX1])[#6]':'AmideNbr',
        '[#7X3][#6X3](=[OX1])[#6]': 'AmideNbr',
        '[NX3][cc]': 'AnilineNbr',
        '[NX3H2;!$(NC=O)]': 'PrimaryAmineNbr',
        '[NX3H1;!$(NC=O)]': 'SecondaryAmineNbr'}

    for funct in funct_dic:
        patt = Chem.MolFromSmarts(funct)
        feature_dic[funct_dic[funct]] = len(mol.GetSubstructMatches(patt))

    # names, coords = get_atoms_coords(Chem.MolToMolBlock(mol))
    # feature_dic['MinRadiusOfCylinder'] = returnCircleAsTuple(coords[:,1:])[2]
    # feature_dic['MinRadiusOfCylinder'] = RADIUS[0]
    # feature_dic['RadiusOfCylinderBestConf'] = RADIUS[1]

    values = []
    for key in sorted(feature_dic.keys()):
        values.append(feature_dic[key])
    # print key, feature_dic[key]
    return values
Example #16
0
   Algorithm in:
    P. Ertl, B. Rohde, P. Selzer
     Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based
     Contributions and Its Application to the Prediction of Drug Transport 
     Properties, J.Med.Chem. 43, 3714-3717, 2000

   Implementation based on the Daylight contrib program tpsa.c
  """  
  contribs = _pyTPSAContribs(mol,verbose=verbose)
  res = 0.0
  for contrib in contribs:
    res += contrib
  return res
_pyTPSA.version="1.0.1"

TPSA=lambda *x,**y:rdMolDescriptors.CalcTPSA(*x,**y)
TPSA.version=rdMolDescriptors._CalcTPSA_version


if __name__ == '__main__':
  smis = ['C','CC','CCC','CCCC','CO','CCO','COC']
  smis = ['C(=O)O','c1ccccc1']
  for smi in smis:
    m = Chem.MolFromSmiles(smi)
    #print(smi, LabuteASA(m))
    print('-----------\n',smi)
    #print('M:',['% 4.2f'%x for x in SMR_VSA_(m)])
    #print('L:',['% 4.2f'%x for x in SlogP_VSA_(m)])
    print('P:',['% 4.2f'%x for x in PEOE_VSA_(m)])
    print('P:',['% 4.2f'%x for x in PEOE_VSA_(m)])
    print()
Example #17
0
#    "anhydride": Chem.MolFromSmarts('[#6]-[#6](=O)-[#8]-[#6](-[#6])=O'),  # CC(=O)OC(=O)C
#    "peroxide": Chem.MolFromSmarts('[#8]-[#8]'),  # R-O-O-R'
#    "ab_unsaturated_ketone": Chem.MolFromSmarts('[#6]=[#6]-[#6]=O'),  # R=CC=O
#}

DESCRIPTORS = {
    # classical molecular descriptors
    "num_heavy_atoms": lambda x: x.GetNumAtoms(),
    "molecular_weight": lambda x: round(Desc.ExactMolWt(x), 4),
    "num_rings": lambda x: rdMolDesc.CalcNumRings(x),
    "num_rings_arom": lambda x: rdMolDesc.CalcNumAromaticRings(x),
    "num_rings_ali": lambda x: rdMolDesc.CalcNumAliphaticRings(x),
    "num_hbd": lambda x: rdMolDesc.CalcNumLipinskiHBD(x),
    "num_hba": lambda x: rdMolDesc.CalcNumLipinskiHBA(x),
    "slogp": lambda x: round(Crippen.MolLogP(x), 4),
    "tpsa": lambda x: round(rdMolDesc.CalcTPSA(x), 4),
    "num_rotatable_bond": lambda x: rdMolDesc.CalcNumRotatableBonds(x),
    "num_atoms_oxygen": lambda x: len(
        [a for a in x.GetAtoms() if a.GetAtomicNum() == 8]
    ),
    "num_atoms_nitrogen": lambda x: len(
        [a for a in x.GetAtoms() if a.GetAtomicNum() == 7]
    ),
    "num_atoms_halogen": Fragments.fr_halogen,
    "num_atoms_bridgehead": rdMolDesc.CalcNumBridgeheadAtoms,
    # custom molecular descriptors
    #"ring_size_min": get_min_ring_size,
    #"ring_size_max": get_max_ring_size,
    "frac_sp3": lambda x: rdMolDesc.CalcFractionCSP3(x),
    # HTS filters 1/2 - present in the RDKit Fragments
    #"num_aldehyde": Fragments.fr_aldehyde,
Example #18
0
    'HBD', 'jIndex'
]
for name in prop_names:
    d[f'{name}'] = []

for i, s in enumerate(smiles):
    if (i % 10000 == 0):
        print(i)
    m = Chem.MolFromSmiles(s)
    if (m == None or 'i' in s or '.' in s):
        DUD = DUD.drop(i)
        print(s, i)
    else:
        d['QED'].append(QED.default(m))
        d['logP'].append(Crippen.MolLogP(m))
        d['molWt'].append(Descriptors.MolWt(m))
        d['maxCharge'].append(Descriptors.MaxPartialCharge(m))
        d['minCharge'].append(Descriptors.MinPartialCharge(m))
        d['valence'].append(Descriptors.NumValenceElectrons(m))
        d['TPSA'].append(rdMolDescriptors.CalcTPSA(m))
        d['HBA'].append(rdMolDescriptors.CalcNumHBA(m))
        d['HBD'].append(rdMolDescriptors.CalcNumHBD(m))
        d['jIndex'].append(GraphDescriptors.BalabanJ(m))

df = pd.DataFrame.from_dict(d)

df_merge = pd.merge(df, DUD, on=df.index)

#df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv')
df_merge.to_csv('C:/Users/jacqu/Documents/data/DUD_full.csv')
Example #19
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
    def get_global_features(self, mol):
        u = []
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)

        # First get some basic features
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        HeavyAtomMolWt = Descriptors.HeavyAtomMolWt(mol)
        NumValenceElectrons = Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        MaxAbsPartialCharge = Descriptors.MaxAbsPartialCharge(mol)
        MaxPartialCharge = Descriptors.MaxPartialCharge(mol)
        MinAbsPartialCharge = Descriptors.MinAbsPartialCharge(mol)
        MinPartialCharge = Descriptors.MinPartialCharge(mol)
        '''
        #        FpDensityMorgan1 = Descriptors.FpDensityMorgan1(mol)
        #        FpDensityMorgan2 = Descriptors.FpDensityMorgan2(mol)
        #        FpDensityMorgan3 = Descriptors.FpDensityMorgan3(mol)

        # Get some features using chemical feature factory

        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0

        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1
            else:
                pass
                #print(feats[j].GetFamily())

        # Now get some features using rdMolDescriptors

        moreGlobalFeatures = [rdm.CalcNumRotatableBonds(mol), rdm.CalcChi0n(mol), rdm.CalcChi0v(mol), \
                            rdm.CalcChi1n(mol), rdm.CalcChi1v(mol), rdm.CalcChi2n(mol), rdm.CalcChi2v(mol), \
                            rdm.CalcChi3n(mol), rdm.CalcChi4n(mol), rdm.CalcChi4v(mol), \
                            rdm.CalcFractionCSP3(mol), rdm.CalcHallKierAlpha(mol), rdm.CalcKappa1(mol), \
                            rdm.CalcKappa2(mol), rdm.CalcLabuteASA(mol), \
                            rdm.CalcNumAliphaticCarbocycles(mol), rdm.CalcNumAliphaticHeterocycles(mol), \
                            rdm.CalcNumAliphaticRings(mol), rdm.CalcNumAmideBonds(mol), \
                            rdm.CalcNumAromaticCarbocycles(mol), rdm.CalcNumAromaticHeterocycles(mol), \
                            rdm.CalcNumAromaticRings(mol), rdm.CalcNumBridgeheadAtoms(mol), rdm.CalcNumHBA(mol), \
                            rdm.CalcNumHBD(mol), rdm.CalcNumHeteroatoms(mol), rdm.CalcNumHeterocycles(mol), \
                            rdm.CalcNumLipinskiHBA(mol), rdm.CalcNumLipinskiHBD(mol), rdm.CalcNumRings(mol), \
                            rdm.CalcNumSaturatedCarbocycles(mol), rdm.CalcNumSaturatedHeterocycles(mol), \
                            rdm.CalcNumSaturatedRings(mol), rdm.CalcNumSpiroAtoms(mol), rdm.CalcTPSA(mol)]


        u = [natoms, nbonds, mw, HeavyAtomMolWt, NumValenceElectrons, \
            nbrAcceptor, nbrDonor, nbrHydrophobe, nbrLumpedHydrophobe, \
            nbrPosIonizable, nbrNegIonizable]

        u = u + moreGlobalFeatures
        u = np.array(u).T
        # Some of the descriptors produice NAN. We can convert them to 0
        # If you are getting outliers in the training or validation set this could be
        # Because some important features were set to zero here because it produced NAN
        # Removing those features from the feature set might remove the outliers

        #u[np.isnan(u)] = 0

        #u = torch.tensor(u, dtype=torch.float)
        return (u)
Example #21
0
def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:
Example #22
0
def get_tpsa_(mol: Mol) -> float:
    return round(rdMolDescriptors.CalcTPSA(mol), round_digs)
Example #23
0
def get_molecular_features(dataframe, mol_list):
    df = dataframe
    for i in range(len(mol_list)):
        print("Getting molecular features for molecule: ", i)
        mol = mol_list[i]
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        df.at[i,"NbrAtoms"] = natoms
        df.at[i,"NbrBonds"] = nbonds
        df.at[i,"mw"] = mw
        df.at[i,'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol)
        df.at[i,'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons(mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol)
        df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol)
        df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol)
        df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol)
        '''
        df.at[i,'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol)
        df.at[i,'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol)
        df.at[i,'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol)
        
        #print(natoms, nbonds)
        
        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)
        #df["Acceptor"] = 0
        #df["Aromatic"] = 0
        #df["Hydrophobe"] = 0
        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0
        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1                
            else:
                pass#print(feats[j].GetFamily())
                        
        df.at[i,"Acceptor"] = nbrAcceptor
        df.at[i,"Donor"] = nbrDonor
        df.at[i,"Hydrophobe"] = nbrHydrophobe
        df.at[i,"LumpedHydrophobe"] = nbrLumpedHydrophobe
        df.at[i,"PosIonizable"] = nbrPosIonizable
        df.at[i,"NegIonizable"] = nbrNegIonizable
        
        # We can also get some more molecular features using rdMolDescriptors
        
        df.at[i,"NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
        df.at[i,"CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol)
        df.at[i,"CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol)
        df.at[i,"CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol)
        df.at[i,"CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol)
        df.at[i,"CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol)
        df.at[i,"CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol)
        df.at[i,"CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol)
        df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol)
        df.at[i,"CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol)
        df.at[i,"CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol)
        df.at[i,"CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
        df.at[i,"CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
        df.at[i,"CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol)
        df.at[i,"CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol)
        #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol)
        df.at[i,"CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
        df.at[i,"CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
        df.at[i,"CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
        df.at[i,"CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
        df.at[i,"CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
        df.at[i,"CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
        df.at[i,"CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
        df.at[i,"CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
        df.at[i,"CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
        df.at[i,"CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
        df.at[i,"CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
        df.at[i,"CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
        df.at[i,"CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles(mol)
        df.at[i,"CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
        df.at[i,"CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
        df.at[i,"CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol)
        df.at[i,"CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
        df.at[i,"CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
        df.at[i,"CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
        df.at[i,"CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol)
        df.at[i,"CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol)
    return(df)