def cife(X, y, **kwargs): """ This function implements the CIFE feature selection Input ----- X: {numpy array}, shape (n_samples, n_features) input data, guaranteed to be discrete y: {numpy array}, shape (n_samples,) input class labels kwargs: {dictionary} n_selected_features: {int} number of features to select Output ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature J_CMI: {numpy array}, shape: (n_features,) corresponding objective function value of selected features MIfy: {numpy array}, shape: (n_features,) corresponding mutual information between selected features and response Reference --------- Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012. """ if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] dictOfHeader = kwargs['dict_features'] F, J_CMI, MIfy, dictFeatJcmi = LCSI.lcsi(X, y, beta=1, gamma=1, n_selected_features=n_selected_features, dict_features=dictOfHeader) else: F, J_CMI, MIfy, dictFeatJcmi = LCSI.lcsi(X, y, beta=1, gamma=1) return F, J_CMI, MIfy, dictFeatJcmi
def mrmr(X, y, **kwargs): """ This function implements the MRMR feature selection Input ----- X: {numpy array}, shape (n_samples, n_features) input data, guaranteed to be discrete y: {numpy array}, shape (n_samples,) input class labels kwargs: {dictionary} n_selected_features: {int} number of features to select Output ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature J_CMI: {numpy array}, shape: (n_features,) corresponding objective function value of selected features MIfy: {numpy array}, shape: (n_features,) corresponding mutual information between selected features and response Reference --------- Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012. """ if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR', n_selected_features=n_selected_features) else: F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR') return F, J_CMI, MIfy
def mim(X, y, **kwargs): """ This function implements the MIM feature selection Input ----- X: {numpy array}, shape (n_samples, n_features) input data, guaranteed to be discrete y: {numpy array}, shape (n_samples,) input class labels kwargs: {dictionary} n_selected_features: {int} number of features to select Output ------ F: {numpy array}, shape (n_features, ) index of selected features, F[0] is the most important feature Reference --------- Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012. """ if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] F = LCSI.lcsi(X, y, beta=0, gamma=0, n_selected_features=n_selected_features) else: F = LCSI.lcsi(X, y, beta=0, gamma=0) return F
def mifs(X, y, mode="rank", **kwargs): """ This function implements the MIFS feature selection Input ----- X: {numpy array}, shape (n_samples, n_features) input data, guaranteed to be discrete y: {numpy array}, shape (n_samples,) input class labels kwargs: {dictionary} n_selected_features: {int} number of features to select Output ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature J_CMI: {numpy array}, shape: (n_features,) corresponding objective function value of selected features MIfy: {numpy array}, shape: (n_features,) corresponding mutual information between selected features and response Reference --------- Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012. """ if 'beta' not in list(kwargs.keys()): beta = 0.5 else: beta = kwargs['beta'] if 'n_selected_features' in list(kwargs.keys()): n_selected_features = kwargs['n_selected_features'] F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0, n_selected_features=n_selected_features) else: F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0) if mode == "index": return np.array(F, dtype=int) else: # make sure that F is the same size?? return reverse_argsort(F, size=X.shape[1])
# if su_calculation(fp, fi) >= t1[i, 1]: # # construct the mask for feature whose su is larger than su(fp,y) # idx = s_list[:, 0] != i # idx = np.array([idx, idx]) # idx = np.transpose(idx) # # delete the feature by using the mask # s_list = s_list[idx] # length = len(s_list)//2 # s_list = s_list.reshape((length, 2)) # return np.array(F, dtype=int), np.array(SU) # #feat_index, sym_arr = fcbf(X_train_data.iloc[:,:5], X_test_data.iloc[:,:5]) #MIM from skfeature.function.information_theoretical_based import LCSI F, J_CMI, MIfy = LCSI.lcsi(X_train_data, y_train_data, beta=0, gamma=0) from sklearn.ensemble import AdaBoostRegressor regr = AdaBoostRegressor(random_state=0, n_estimators=100) regr.fit(X_train_data, y_train_data) imp = regr.feature_importances_ X_train_data.columns[imp > 0] from chefboost import Chefboost as chef import pandas as pd config = {'algorithm': 'C4.5'} df = X_train_data df["Decision"] = y_train_data