Esempio n. 1
0
    def testPythonDescriptorFunctor(self):
        class NumAtoms(Descriptors.PropertyFunctor):
            def __init__(self):
                Descriptors.PropertyFunctor.__init__(self, "NumAtoms", "1.0.0")

            def __call__(self, mol):
                return mol.GetNumAtoms()

        numAtoms = NumAtoms()
        rdMD.Properties.RegisterProperty(numAtoms)
        props = rdMD.Properties(["NumAtoms"])
        self.assertEquals(1,
                          props.ComputeProperties(Chem.MolFromSmiles("C"))[0])

        self.assertTrue("NumAtoms" in rdMD.Properties.GetAvailableProperties())
        # check memory
        del numAtoms
        self.assertEquals(1,
                          props.ComputeProperties(Chem.MolFromSmiles("C"))[0])
        self.assertTrue("NumAtoms" in rdMD.Properties.GetAvailableProperties())

        m = Chem.MolFromSmiles("c1ccccc1")
        properties = rdMD.Properties()
        for name, value in zip(properties.GetPropertyNames(),
                               properties.ComputeProperties(m)):
            print(name, value)

        properties = rdMD.Properties(['exactmw', 'lipinskiHBA'])
        for name, value in zip(properties.GetPropertyNames(),
                               properties.ComputeProperties(m)):
            print(name, value)
Esempio n. 2
0
    def testProperties(self):
        props = rdMD.Properties()
        names = list(props.GetAvailableProperties())
        self.assertEquals(names, list(props.GetPropertyNames()))
        m = Chem.MolFromSmiles("C1CC1CC")
        results = props.ComputeProperties(m)

        for i, name in enumerate(names):
            props = rdMD.Properties([name])
            res = props.ComputeProperties(m)
            self.assertEquals(len(res), 1)
            self.assertEquals(res[0], results[i])
            self.assertEquals(props.GetPropertyNames()[0], names[i])
            self.assertEquals(len(props.GetPropertyNames()), 1)

        try:
            props = rdMD.Properties([1, 2, 3])
            self.assertEquals("should not get here", "but did")
        except TypeError:
            pass

        try:
            props = rdMD.Properties(["property that doesn't exist"])
            self.assertEquals("should not get here", "but did")
        except RuntimeError:
            pass
Esempio n. 3
0
def _RDKit_properties_FUTURE(ifile) -> (dict):
    '''Computes RDKit properties for the SDF provided as argument

    Parameters
    ----------

    ifile: str
        SDF input file

    Returns
    -------

    results_dict: dict, with the following keys

        'matrix': ndarray, properties matrix. Full form with non processed
                and failed molecules.
        'names': list, matrix column names. Properties names.
        'success_arr': ndarray, array with bool values indicating if mol
                    had any issues during supplier (None) or in the 
                    descriptor array (presence of NaNs).

    '''
    properties = rdMolDescriptors.Properties()

    props_names = [propname for propname in properties.GetPropertyNames()]
    n_props = len(props_names)

    n_cols = n_props

    LOG.info('computing {} RDKit properties per molecule...'.format(n_cols))

    results_dict = _calc_descriptors(properties.ComputeProperties, ifile,
                                     props_names)

    return results_dict
Esempio n. 4
0
    def compute_properties(self, feature_name=None):
        """compute the basic properties from the rdkit package

        Args:
            feature_name: a list of input features names. if not specified, all
                          avaiable features will be calculated.

        Returns:
            prop_dict: property dictionary, mixed with float and int
        """

        assert type(self.Molecule) == Chem.rdchem.Mol

        if feature_name is not None:
            properties = rdDesc.Properties(feature_name)
        else:
            properties = rdDesc.Properties()

        props_dict = dict(
            zip(properties.GetPropertyNames(),
                properties.ComputeProperties(self.Molecule)))

        return props_dict
def getChemicalProperties(SMILE):
    # A functions that uses RDKit to generate chemical properties from SMILES representation
    property_names = []
    properties = []
    try:
        molecule = Chem.MolFromSmiles(SMILE)
        generate_properties = rdMolDescriptors.Properties()
        for name, value in zip(
                generate_properties.GetPropertyNames(),
                generate_properties.ComputeProperties(molecule)):
            property_names.append(name)
            properties.append(value)

        return property_names, properties

    except:
        return ["N/A"] * 25, ["N/A"] * 25  # RDKit gives you 25 properties
Esempio n. 6
0
def get_rdkit_properties(df):
    properties = rdMolDescriptors.Properties()
    mol_list = [MolFromSmiles(SMILES) for SMILES in df.CompoundSMILES]
       
    descriptor_values = []
    descriptor_names = [name for name in properties.GetPropertyNames()]

    # Add all of the avilable rdkit descriptors
    for mol in mol_list:
        descriptor_temp_list = []
        for name,value in zip(properties.GetPropertyNames(), properties.ComputeProperties(mol)):
            descriptor_temp_list.append(value)
        descriptor_values.append(descriptor_temp_list)
    
    i = 0    
    for name in descriptor_names:
        list_append = [value[i] for value in descriptor_values]
        df[name] = list_append
        i += 1
    
    return df,properties  
Esempio n. 7
0
    def _do_norm_X(self, smiles_list, find_norm=True):
        X = []

        logS_list_esol = self.esol_calculator.predict(smiles_list)
        logS_list_rf = self.rf_regression.predict(smiles_list)
        logS_list_nfp = self.nfp_regression.predict(smiles_list)

        for i, smiles in enumerate(smiles_list):
            mol = Chem.MolFromSmiles(smiles)
            props = [
                list(
                    rdMolDescriptors.Properties([name
                                                 ]).ComputeProperties(mol))[0]
                for name in self._feats
            ]
            vals = [logS_list_esol[i], logS_list_rf[i], logS_list_nfp[i]]

            x_row = vals + props
            X.append(x_row)

        if find_norm:
            self._size = len(X[0])
            self._mean_props = np.mean(X, axis=0)
            self._std_props = np.std(X, axis=0)

        logging.debug("Size of props")
        logging.debug(self._size)
        logging.debug("Means of X")
        logging.debug(self._mean_props)
        logging.debug("Std of X")
        logging.debug(self._std_props)

        for i in range(len(X)):
            for j, X_ij in enumerate(X[i]):
                X[i][j] = (X_ij - self._mean_props[j]) / self._std_props[j]

        return X
Esempio n. 8
0
    def __init__(self,
                 fp='ecfp',
                 radius=2,
                 fp_length=1024,
                 prop=False,
                 n_ests=200):
        super().__init__()
        self._name = "EnsembleRegressor"
        self._fp = fp
        self._fp_r = radius
        self._fp_length = fp_length
        self._feats = list(rdMolDescriptors.Properties().GetPropertyNames())

        self.model = None
        self.esol_calculator = ESOLCalculator()
        self.rf_regression = RFPredictor(n_ests=n_ests, fp_type=self._fp)
        self.nfp_regression = NfpPredictor()

        self._means_logS = None
        self._std_logS = None
        self._means_props = None
        self._std_props = None
        self._size = -1
        self._epochs = 10
Esempio n. 9
0
def _RDKit_properties(ifile, **kwargs) -> (bool, (np.ndarray, list, list)):
    ''' 
    computes RDKit properties for the file provided as argument

    output is a boolean and a tupla with the xmatrix and the variable names
    '''
    try:
        suppl = Chem.SDMolSupplier(ifile)
    except Exception as e:
        LOG.error(f'Unable to create supplier with exception {e}')
        return False, 'unable to create supplier'

    LOG.info('computing RDKit properties...')

    properties = rdMolDescriptors.Properties()

    # get from here num of properties
    md_name = [prop_name for prop_name in properties.GetPropertyNames()]

    success_list = []
    xmatrix = []

    try:
        num_obj = 0
        for mol in suppl:
            if mol is None:
                LOG.error(
                    f'Unable to process molecule #{num_obj+1} in {ifile}')
                success_list.append(False)
                continue

            # xmatrix [num_obj] = properties.ComputeProperties(mol)
            if num_obj == 0:
                descriptors = properties.ComputeProperties(mol)
                # what is going on here??
                if np.isnan(xmatrix).any():
                    success_list.append(False)
                    continue
                else:
                    xmatrix = descriptors
            else:
                descriptors = properties.ComputeProperties(mol)
                if np.isnan(descriptors).any():
                    success_list.append(False)
                    continue
                xmatrix = np.vstack((xmatrix, descriptors))

            success_list.append(True)
            num_obj += 1

    except Exception as e:
        LOG.error(
            f'Failed computing RDKit properties for molecule #{num_obj+1} in {ifile}'
            f' with exception: {e}')
        return False, 'Failed computing RDKit properties for molecule' + str(
            num_obj + 1) + 'in file ' + ifile

    LOG.debug(
        f'computed RDKit properties matrix with shape {np.shape(xmatrix)}')
    if num_obj == 0:
        return False, 'Unable to compute RDKit properties for molecule ' + ifile

    results = {
        'matrix': xmatrix,
        'names': md_name,
        'success_arr': success_list
    }
    return True, results
Esempio n. 10
0
def _RDKit_properties(ifile, **kwargs) -> (bool, (np.ndarray, list, list)):
    ''' 
    computes RDKit properties for the file provided as argument

    output is a boolean and a tupla with the xmatrix and the variable names
    '''
    try:
        suppl = Chem.SDMolSupplier(ifile)
    except Exception as e:
        LOG.error(f'Unable to create supplier with exception {e}')
        return False, 'unable to create supplier'

    LOG.info('computing RDKit properties...')

    properties = rdMolDescriptors.Properties()

    # get from here num of properties
    md_name = [prop_name for prop_name in properties.GetPropertyNames()]

    #print (md_name)

    success_list = []
    est_obj = len(suppl)
    xmatrix = np.zeros((est_obj, len(md_name)))

    try:
        num_obj = 0
        for mol in suppl:
            if mol is None:
                LOG.error(
                    f'Unable to process molecule #{num_obj+1} in {ifile}')

                success_list.append(False)
                continue

            if mol.GetNumHeavyAtoms() == 0:
                LOG.error('Empty molecule' f'#{num_obj+1} in {ifile}')
                success_list.append(False)
                continue

            descriptors = properties.ComputeProperties(mol)

            if np.isnan(descriptors).any() or np.isinf(descriptors).any():
                success_list.append(False)
                continue

            xmatrix[num_obj] = descriptors

            # xmatrix.append(descriptors)
            # if num_obj == 0:
            #     xmatrix = descriptors
            #     LOG.debug(f'first descriptor vector computed')
            # else:
            #     xmatrix = np.vstack((xmatrix, descriptors))

            success_list.append(True)
            num_obj += 1

    except Exception as e:
        LOG.error(
            f'Failed computing RDKit properties for molecule #{num_obj+1} in {ifile}'
            f' with exception: {e}')
        return False, 'Failed computing RDKit properties for molecule' + str(
            num_obj + 1) + 'in file ' + ifile

    if num_obj < est_obj:
        # if some molecules failed to compute we will clean xmatrix by
        # removing extra rows
        for i in range(num_obj, est_obj):
            xmatrix = np.delete(xmatrix, num_obj, axis=0)

    LOG.debug(
        f'computed RDKit properties matrix with shape {np.shape(xmatrix)}')
    if num_obj == 0:
        return False, 'Unable to compute RDKit properties for molecule ' + ifile

    results = {
        'matrix': xmatrix,
        'names': md_name,
        'success_arr': success_list
    }
    return True, results