Esempio n. 1
0
def _calculateDescriptors(mol):
    df = pd.DataFrame(index=[0])
    df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
    df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1]
    df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
    df["TPSA"] = Descriptors.TPSA(mol)
    df["AMW"] = Descriptors.MolWt(mol)
    df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol)
    df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
    df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
    df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
    df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
    df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
    df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
    df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
    df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol)
    df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol)
    df["NumRings"] = rdMolDescriptors.CalcNumRings(mol)
    df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
    df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
    df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
    df["NumAromaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
    df["NumSaturatedHeterocycles"] = \
        rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
    df["NumAliphaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
    df["NumAromaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
    df["NumSaturatedCarbocycles"] = \
        rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
    df["NumAliphaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
    df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
    df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol)
    df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol)
    df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol)
    df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol)
    df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol)
    df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol)
    df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol)
    df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol)
    df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol)
    df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
    df["kappa1"] = rdMolDescriptors.CalcKappa1(mol)
    df["kappa2"] = rdMolDescriptors.CalcKappa2(mol)
    df["kappa3"] = rdMolDescriptors.CalcKappa3(mol)
    slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13))))
    df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol))))
    smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11))))
    df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol))))
    peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15))))
    df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol))))
    MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43))))
    df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol))))
    return df
def main(in_file, output):

  Cmpds  = {}
  InMols = rdkit_open([in_file])
  print('\n # Number of input molecule: {0}'.format(len(InMols)))
  for mol in InMols:
    m = {}

    name = mol.GetProp('_Name').split()[0]
    
    m['Name'] = name
    m['Formula'] = rd.CalcMolFormula(mol)
    m['SMILES'] = Chem.MolToSmiles(mol)

    m['MW']   = rd._CalcMolWt(mol)               # Molecular Weight
    m['logP'] = rd.CalcCrippenDescriptors(mol)[0]  # Partition coefficient
    m['HDon'] = rd.CalcNumLipinskiHBD(mol)      # Lipinski Hbond donor
    m['HAcc'] = rd.CalcNumLipinskiHBA(mol)      # Lipinski Hbond acceptor
    m['TPSA'] = rd.CalcTPSA(mol)                # Topological polar surface area

    m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond
    m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1]         # Molar refractivity
    m['AliRing'] = rd.CalcNumAliphaticRings(mol)        # Aliphatic ring number
    m['AroRing'] = rd.CalcNumAromaticRings(mol)         # Aromatic ring number
#    m['Stereo'] = rd.CalcNumAtomStereoCenters(mol)      # Stereo center number
#    m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol)  # unspecified stereo

    m['SMILES'] = Chem.MolToSmiles(mol, 
                    isomericSmiles=True, allHsExplicit=False)
    Cmpds[name] = m

  ####################################

  df = pd.DataFrame.from_dict(Cmpds, orient='index')
  df.index.name = 'Name'

  # Columns of data to print out
  Columns = [ 'Formula',
              'MW',    'logP',   'HDon',    'HAcc',    'TPSA',
              'Rotat', 'MolRef', 'AliRing', 'AroRing', 
              #'Stereo', 'UnspStereo', 
              'SMILES', ]
  reorder = df[Columns]

  # Output to CSV
  reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8',
                  float_format='%.5f', header=True )

  # Output to Excel
  reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
def main():
    sub_df = pd.read_csv("submissions_final_result.csv")

    cmp_ds = []

    for _, row in sub_df.iterrows():
        cmp_dict = {}
        mol = Chem.MolFromSmiles(row['smiles_string'])
        cmp_dict['submission_id'] = row['submission_id']
        cmp_dict['smiles_string'] = row['smiles_string']

        # Lipinski's rule
        cmp_dict['h_bond_donor'] = rd.CalcNumLipinskiHBD(
            mol)  # Lipinski Hbond donor
        cmp_dict['h_bond_acceptor'] = rd.CalcNumLipinskiHBA(
            mol)  # Lipinski Hbond acceptor
        cmp_dict['moluclar_mass'] = rd._CalcMolWt(mol)  # Molecular Weight
        cmp_dict['log_p'] = rd.CalcCrippenDescriptors(mol)[
            0]  # Partition coefficient

        # Topological polar surface area
        cmp_dict['topological_polar_surface_area'] = rd.CalcTPSA(mol)

        cmp_ds.append(cmp_dict)

    result = pd.merge(sub_df,
                      pd.DataFrame(cmp_ds),
                      on=['submission_id', 'smiles_string'])
    result.to_csv("lipinski_psa_result.csv", index=False, encoding='utf-8')
Esempio n. 4
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)

            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m)
            max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=()))
            n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True))
            fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m))
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \
                   max_ring_size, n_chiral_centers, round(fcsp3_bm, 3)
        except:
            sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n')
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 5
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        try:
            hba = rdMolDescriptors.CalcNumHBA(m)
            hbd = rdMolDescriptors.CalcNumHBD(m)
            nrings = rdMolDescriptors.CalcNumRings(m)
            rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
            psa = rdMolDescriptors.CalcTPSA(m)
            logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
            mw = rdMolDescriptors._CalcMolWt(m)
            csp3 = rdMolDescriptors.CalcFractionCSP3(m)
            hac = m.GetNumHeavyAtoms()
            if hac == 0:
                fmf = 0
            else:
                fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac
            qed = QED.qed(m)
            nrings_fused = fused_ring_count(m)
            return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
                   round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused
        except:
            sys.stderr.write(
                f'molecule {name} was omitted due to an error in calculation of some descriptors\n'
            )
            return None
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 6
0
    def calculate_crippen_logp(self):
        '''
        Calcules the Crippen LogP
        :return: ClogP
        '''

        clogp = rdMolDescriptors.CalcCrippenDescriptors(self.mol)
        return clogp[0]
 def calculate_properties(self, mol):
     """this method calculates basic properties for the smiles
     returns : list of int or float (properties)"""
     properties = []
     properties.append(mol.GetNumAtoms())
     properties.append(desc.CalcCrippenDescriptors(mol)[0])
     properties.append(desc.CalcTPSA(mol))
     properties.append(desc.CalcNumRotatableBonds(mol))
     properties.append(desc.CalcFractionCSP3(mol))
     return properties
Esempio n. 8
0
    def __init__(self):
        self.temp_dir = tempfile.TemporaryDirectory()
        self.temp_sdf = tempfile.NamedTemporaryFile(delete=False,
                                                    suffix='.sdf',
                                                    dir=self.temp_dir.name)

        self.api_cache = {}
        self.esol = ESOLCalculator()
        self._properties = [
            ('MW', 'Molecular Weight', '%.3f', Desc.MolWt),
            ('logP', 'Lipophilicity (logP)', '%.3f',
             lambda mol: mDesc.CalcCrippenDescriptors(mol)[0]),
            ('TPSA', 'Total Polar Surface Area', '%.3f', mDesc.CalcTPSA),
            ('ESOL', 'Estimated Solubility', '%.3f', self.esol.calc_esol),
            ('HBA', '# H-Bond Acceptors', '%d', mDesc.CalcNumHBA),
            ('HBD', '# H-Bond Donors', '%d', mDesc.CalcNumHBD),
            ('RB', '# Rotatable Bonds', '%d', mDesc.CalcNumRotatableBonds),
            ('AR', '# Aromatic Rings', '%d', mDesc.CalcNumAromaticRings)
        ]

        if not os.path.exists(API_SETTINGS):
            return

        with open(API_SETTINGS, 'r') as f:
            self.api = json.load(f)

        if self.api.get('overwrite'):
            self._properties = []

        # validate config
        try:
            required_endpoint_keys = ['url', 'method', 'data']
            required_property_keys = ['description', 'format', 'path']

            for endpoint in self.api.get('endpoints'):
                if not endpoint.get('name'):
                    raise Exception('Invalid config: missing endpoint name')

                test = [endpoint[k] for k in required_endpoint_keys]
                for prop, info in endpoint.get('properties').items():
                    test = [info[k] for k in required_property_keys]

        except KeyError as key:
            raise Exception(
                f'Invalid config: missing {key} on {endpoint["name"]}')

        except TypeError:
            raise Exception(f'Invalid config: array where object should be')

        # register properties
        for endpoint in self.api.get('endpoints'):
            for prop, info in endpoint['properties'].items():
                fn = partial(self.fetch_property, endpoint, prop)
                p = (prop, info['description'], info['format'], fn)
                self._properties.append(p)
Esempio n. 9
0
def get_lipinksi_test(mol, rule_test):
    mol.UpdatePropertyCache(strict=False)  
    MW = rdMolDescriptors.CalcExactMolWt(mol)
    
    # Calculate mol features. NB CalcCrippenDescriptors returns tuple logP & mr_values
    feature_values = [rdMolDescriptors.CalcCrippenDescriptors(mol)[0],
                      rdMolDescriptors.CalcNumLipinskiHBD(mol),
                      rdMolDescriptors.CalcNumLipinskiHBA(mol)]
    test_rule = all(value <= rule_test for value in feature_values)
    if MW < 500 and MW > 300 and test_rule == True:
        return True
    else:
        return False
Esempio n. 10
0
def calc(smi, name):
    m = Chem.MolFromSmiles(smi)
    if m is not None:
        hba = rdMolDescriptors.CalcNumHBA(m)
        hbd = rdMolDescriptors.CalcNumHBD(m)
        nrings = rdMolDescriptors.CalcNumRings(m)
        rtb = rdMolDescriptors.CalcNumRotatableBonds(m)
        psa = rdMolDescriptors.CalcTPSA(m)
        logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m)
        mw = rdMolDescriptors._CalcMolWt(m)
        csp3 = rdMolDescriptors.CalcFractionCSP3(m)
        fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True)
        return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \
               round(csp3, 3), round(fmf, 3)
    else:
        sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name))
        return None
Esempio n. 11
0
    def __init__(self):
        self.temp_dir = tempfile.TemporaryDirectory()
        self.temp_sdf = tempfile.NamedTemporaryFile(delete=False,
                                                    suffix='.sdf',
                                                    dir=self.temp_dir.name)

        self.esol = ESOLCalculator()
        self._properties = [
            ('MW', 'Molecular Weight', '%.3f', Desc.MolWt),
            ('logP', 'Lipophilicity (logP)', '%.3f',
             lambda mol: mDesc.CalcCrippenDescriptors(mol)[0]),
            ('TPSA', 'Total Polar Surface Area', '%.3f', mDesc.CalcTPSA),
            ('ESOL', 'Estimated Solubility', '%.3f', self.esol.calc_esol),
            ('HBA', '# H-Bond Acceptors', '%d', mDesc.CalcNumHBA),
            ('HBD', '# H-Bond Donors', '%d', mDesc.CalcNumHBD),
            ('RB', '# Rotatable Bonds', '%d', mDesc.CalcNumRotatableBonds),
            ('AR', '# Aromatic Rings', '%d', mDesc.CalcNumAromaticRings)
        ]
Esempio n. 12
0
 def calculate_properties(self, smiles=None, mol=None, props=[]):
     """this method calculates basic properties for the mol
     returns : error (bool)"""
     if len(props) == 0:
         return True
     if mol is None:
         mol = Chem.MolFromSmiles(smiles)
     if mol is None:
         return True
     if 'py_formula' in props:
         self.data['py_formula'] = desc.CalcMolFormula(mol)
     if 'py_em' in props:
         self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5)
     if 'py_n_Cl_Br' in props:
         all_atoms = []
         for atom in mol.GetAtoms():
             all_atoms.append(atom.GetSymbol())
         n_Cl = all_atoms.count('Cl')
         n_Br = all_atoms.count('Br')
         self.data['py_n_Cl_Br'] = n_Cl + n_Br
     if 'py_na' in props:
         self.data['py_na'] = mol.GetNumAtoms()
     if 'py_mw' in props:
         self.data['py_mw'] = desc._CalcMolWt(mol)
     if 'py_fsp3' in props:
         self.data['py_fsp3'] = desc.CalcFractionCSP3(mol)
     if 'py_rb' in props:
         self.data['py_rb'] = desc.CalcNumRotatableBonds(mol)
     if 'py_tpsa' in props:
         self.data['py_tpsa'] = desc.CalcTPSA(mol)
     if 'py_clogp' in props:
         self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0]
     if 'py_nar' in props:
         self.data['py_nar'] = desc.CalcNumAromaticRings(mol)
     if 'py_nhba' in props:
         self.data['py_nhba'] = desc.CalcNumHBA(mol)
     if 'py_nhbd' in props:
         self.data['py_nhbd'] = desc.CalcNumHBD(mol)
     return False
def calculate_scalar_descriptors(molecule, symbols):
    features = list()
    features.append(rdMD.CalcAsphericity(molecule))
    features += list(rdMD.CalcCrippenDescriptors(molecule))
    features.append(rdMD.CalcExactMolWt(molecule))
    features.append(rdMD.CalcEccentricity(molecule))
    features.append(rdMD.CalcFractionCSP3(molecule))
    features.append(rdMD.CalcLabuteASA(molecule))
    features.append(rdMD.CalcNPR1(molecule))
    features.append(rdMD.CalcNPR2(molecule))
    features.append(rdMD.CalcHallKierAlpha(molecule))

    # elemental distribution
    symbols = np.array(symbols)
    features.append(np.sum(symbols == 'H'))
    features.append(np.sum(symbols == 'C'))
    features.append(np.sum(symbols == 'N'))
    features.append(np.sum(symbols == 'O'))
    features.append(np.sum(symbols == 'F'))

    # ring features
    features.append(rdMD.CalcNumAliphaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAliphaticHeterocycles(molecule))
    features.append(rdMD.CalcNumAromaticCarbocycles(molecule))
    features.append(rdMD.CalcNumAromaticHeterocycles(molecule))
    features.append(rdMD.CalcNumSaturatedCarbocycles(molecule))
    features.append(rdMD.CalcNumSaturatedHeterocycles(molecule))
    features.append(rdMD.CalcNumSpiroAtoms(
        molecule))  # atom shared between rings with one bond
    features.append(rdMD.CalcNumBridgeheadAtoms(
        molecule))  # atom shared between rings with at least two bonds

    # other counts
    features.append(rdMD.CalcNumAmideBonds(molecule))
    features.append(rdMD.CalcNumHBA(molecule))  # number of hydrogen acceptors
    features.append(rdMD.CalcNumHBD(molecule))  # number of hydrogen donors

    return np.array(features)
def get_fingerprint(SMILES=None, E_BIND=None):
    """
    PRE: Takes in a MOLECULE as a SMILES
    POST: Prints its finger prints as two list, the first contains the names, the second contains the fingerprints
    """

    def get_atoms_coords(RDKIT_BLOCK):
        """Takes as input an RDKIT BLOCK and returns a list of atoms with a numpy array containing the coordinates"""
        RDKIT_BLOCK = RDKIT_BLOCK.split('\n')
        atm_number = int(RDKIT_BLOCK[3][:3])
        RDKIT_BLOCK = [x.split() for x in RDKIT_BLOCK]
        atm_list = []
        coords_array = np.zeros([atm_number, 3], dtype=float)
        for i, line in enumerate(RDKIT_BLOCK[4:4 + atm_number]):
            coords_atm = line
            atm_list.append(coords_atm[3])
            coords_array[i, :] = coords_atm[:3]
        return atm_list, coords_array

    def get_atom_types(mol):
        """
        PRE: Takes in the mol
        POST: Returns a dictionary with the atom types and numbers
        """
        atom_types = {}
        for atom in mol.GetAtoms():
            symbol = atom.GetSymbol()
            if symbol in atom_types:
                atom_types[symbol] += 1
            else:
                atom_types[symbol] = 1
        return atom_types

    def AreRingFused(mol):
        """
        PRE  : Takes in a mol rdkit
        POST : Returns the max number of fused rings. That is the maximum number of rings any atom belongs to
        """
        rings = Chem.GetSymmSSSR(mol)
        ring_dic = {}
        for ring in rings:
            for atom in list(ring):
                if atom in ring_dic:
                    ring_dic[atom] += 1
                else:
                    ring_dic[atom] = 1
        if ring_dic.values() == []:
            return 0
        else:
            return max(ring_dic.values())

    def getVolume(mol, atom_types):
        """
        PRE: Takes in a mol with HYDROGENS ADDED
        POST: Returns its volume computed as a linear combination of the contribution of the vdW volumes
        """
        index_of_vols = {'H': 7.24, 'C': 20.58, 'N': 15.60, 'O': 14.71, 'F': 13.31, 'Cl': 22.45, 'Br': 26.52,
                         'I': 32.52, 'P': 24.43, 'S': 24.43, 'As': 26.52, 'B': 40.48, 'Si': 38.79, 'Se': 28.73,
                         'Te': 36.62}
        gross_volume = 0
        # for sym in atom_types:
            # gross_volume += atom_types[sym] * index_of_vols[sym]
        bonds = mol.GetNumBonds()
        rings = Chem.GetSymmSSSR(mol)
        # print 'aromatic ring count is ',descriptors.CalcNumAromaticRings(mol)
        # print 'aliphatic ring count is ',descriptors.CalcNumAliphaticRings(mol)
        ra = 0
        largest_ra = 0
        rna = 0
        largest_rna = 0
        for ringId in range(len(rings)):
            if isRingAromatic(mol, tuple(rings[ringId])):
                ra += 1
                if largest_ra < len(rings[ringId]):
                    largest_ra = len(rings[ringId])
            else:
                rna += 1
                if largest_rna < len(rings[ringId]):
                    largest_rna = len(rings[ringId])
        # volume = gross_volume - 5.92 * bonds - 14.7 * ra - 3.8 * rna
        try:
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            volume = AllChem.ComputeMolVolume(mol)
        except:
            raise ValueError("Can't build the molecule")
        return volume, ra, rna, largest_ra, largest_rna

    def isRingAromatic(mol, ring):
        """
        PRE: Takes in a mol and a ring given as a tuple of atom id
        POST: Returns TRUE is all the atoms inside the ring are aromatic and FALSE otherwise
        """
        aromatic = True
        for ids in ring:
            if mol.GetAtomWithIdx(ids).GetIsAromatic():
                # print ids
                pass
            else:
                aromatic = False
                break
        return aromatic

    mol = SMILES
    features = [
        'atomNbr',
        'Volume',
        'NAtom',
        'OAtom',
        'SAtom',
        'PAtom',
        'ClAtom',
        'BrAtom',
        'FAtom',
        'IAtom',
        'AromaticRingNumber',
        'LargestAromaticRingAtomNbr',
        'NonAromaticRingNumber',
        'LargestNonAromaticRingAtomNbr',
        'MaxNbrFusedRings',
        'SurfaceArea',
        'Charge',
        # 'MinRadiusOfCylinder',
        # 'RadiusOfCylinderBestConf',
        'NitroNbr',
        'AlcoholNbr',
        'KetoneNbr',
        'NitrileNbr',
        'ThiolNbr',
        'Phenol_likeNbr',
        'EsterNbr',
        'SulfideNbr',
        'CarboxilicAcidNbr',
        'EtherNbr',
        'AmideNbr',
        'AnilineNbr',
        'PrimaryAmineNbr',
        'SecondaryAmineNbr',
        'RotableBondNum',
        'HBondDonor',
        'HBondAcceptor',
        'MolLogP',
        'MolMR'
    ]
    for i in range(6):
        features.append('Chi{}v'.format(i + 1))
        features.append('Chi{}n'.format(i + 1))
        if i < 3:
            features.append('Kappa{}'.format(i + 1))

    feature_dic = dict.fromkeys(features)
    if mol == None:
        return sorted(feature_dic.keys())

    mol = Chem.MolFromSmiles(SMILES)
    mol = Chem.AddHs(mol)

    feature_dic['RotableBondNum'] = descriptors.CalcNumRotatableBonds(mol)

    for i in range(6):
        feature_dic['Chi{}v'.format(i + 1)] = descriptors.CalcChiNv(mol, i + 1)
        feature_dic['Chi{}n'.format(i + 1)] = descriptors.CalcChiNn(mol, i + 1)

    feature_dic['Kappa1'] = descriptors.CalcKappa1(mol)
    feature_dic['Kappa2'] = descriptors.CalcKappa2(mol)
    feature_dic['Kappa3'] = descriptors.CalcKappa3(mol)

    feature_dic['HBondAcceptor'] = descriptors.CalcNumHBA(mol)
    feature_dic['HBondDonor'] = descriptors.CalcNumHBD(mol)

    CrippenDescriptors = descriptors.CalcCrippenDescriptors(mol)
    feature_dic['MolLogP'] = CrippenDescriptors[0]
    feature_dic['MolMR'] = CrippenDescriptors[1]

    atom_types = get_atom_types(mol)
    for feat, symbol in zip(['NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom'],
                            ['N', 'O', 'S', 'P', 'Cl', 'Br', 'F', 'I']):
        if symbol in atom_types:
            feature_dic[feat] = atom_types[symbol]
        else:
            feature_dic[feat] = 0

    feature_dic['atomNbr'] = mol.GetNumHeavyAtoms()
    feature_dic['Volume'], feature_dic['AromaticRingNumber'], feature_dic['NonAromaticRingNumber'], feature_dic[
        'LargestAromaticRingAtomNbr'], feature_dic['LargestNonAromaticRingAtomNbr'] = getVolume(mol, atom_types)
    feature_dic['MaxNbrFusedRings'] = AreRingFused(mol)
    feature_dic['SurfaceArea'] = descriptors.CalcTPSA(mol)
    feature_dic['Charge'] = Chem.GetFormalCharge(mol)

    funct_dic = {
        '[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]': 'NitroNbr',
        '[#6][OX2H]': 'AlcoholNbr',
        '[NX1]#[CX2]': 'NitrileNbr',
        '[#6][CX3](=O)[#6]': 'KetoneNbr',
        '[#16X2H]': 'ThiolNbr',
        "[OX2H][cX3][c]": 'Phenol_likeNbr',
        '[#6][CX3](=O)[OX2H0][#6]': 'EsterNbr',
        '[#16X2H0]': 'SulfideNbr',
        '[CX3](=O)[OX2H1]': 'CarboxilicAcidNbr',
        '[OD2]([#6])[#6]': 'EtherNbr',
        # '[NX3][CX3](=[OX1])[#6]':'AmideNbr',
        '[#7X3][#6X3](=[OX1])[#6]': 'AmideNbr',
        '[NX3][cc]': 'AnilineNbr',
        '[NX3H2;!$(NC=O)]': 'PrimaryAmineNbr',
        '[NX3H1;!$(NC=O)]': 'SecondaryAmineNbr'}

    for funct in funct_dic:
        patt = Chem.MolFromSmarts(funct)
        feature_dic[funct_dic[funct]] = len(mol.GetSubstructMatches(patt))

    # names, coords = get_atoms_coords(Chem.MolToMolBlock(mol))
    # feature_dic['MinRadiusOfCylinder'] = returnCircleAsTuple(coords[:,1:])[2]
    # feature_dic['MinRadiusOfCylinder'] = RADIUS[0]
    # feature_dic['RadiusOfCylinderBestConf'] = RADIUS[1]

    values = []
    for key in sorted(feature_dic.keys()):
        values.append(feature_dic[key])
    # print key, feature_dic[key]
    return values
Esempio n. 15
0
        mol = inMol

    if patts is None:
        global _smartsPatterns, _patternOrder
        if _smartsPatterns == {}:
            _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName)
        patts = _smartsPatterns
        order = _patternOrder

    atomContribs = _pyGetAtomContribs(mol, patts, order, verbose=verbose)
    return numpy.sum(atomContribs, 0)[1]


_pyMolMR.version = "1.1.0"

MolLogP = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[0]
MolLogP.version = rdMolDescriptors._CalcCrippenDescriptors_version
MolLogP.__doc__ = """ Wildman-Crippen LogP value

  Uses an atom-based scheme based on the values in the paper:
     S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999)

  **Arguments**

    - inMol: a molecule

    - addHs: (optional) toggles adding of Hs to the molecule for the calculation.
      If true, hydrogens will be added to the molecule and used in the calculation.

"""
Esempio n. 16
0
def logp(molecule):
    """
    Calculate the logP of the selfies string
    """
    m = MolFromSmiles(sf.decoder(molecule))
    return rdMolDescriptors.CalcCrippenDescriptors(m)[0]
 def compute_logP(self, mol_input):
     logP, mr = rdMolDescriptors.CalcCrippenDescriptors(mol_input)
     return logP