def problem3_3_5(data): X = NomToNum(data) y = X[6] X = X.drop([6], axis=1) selectedIDX = cfs.cfs(X.values, y.values) newdata = X.loc[:, selectedIDX] return selectedIDX, newdata
def fit_interact_cfs(self, X, y, mode='uc', threshold=0, backward=True, look_ahead=1, **kwargs): cols1 = interact(X, y, threshold) cols2 = cfs(X, y, backward, look_ahead, mode) cols_set = set(cols1) | set(cols2) cols = list(cols_set) cols.sort() self.cols_ = cols
def fit_cfs(self, X, y, mode='uc', backward=True, look_ahead=1): self.cols_ = cfs(X, y, backward, look_ahead, mode)
import cfs from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score data = pd.read_csv('data/mama.csv') col = data.columns y = data.diagnosis list = ['Unnamed: 32','id','diagnosis'] x = data.drop(list,axis = 1) B, M = y.value_counts() print('Number of Benign: ',B) print('Number of Malignant : ',M) features = cfs.cfs(np.asarray(x), np.asarray(y)) # // Features Selecionadas = [24, 8, 20, 0, 4, 1] # Removendo Atributos Não Selecionados x_1 = x.drop(x.columns[[2,3,5,6,7,9,10,11,12,13,14,15, 16,17,18,19,21,22,23,25,26,27,28,29]], axis=1) # ORIGINAL X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.3, random_state=42) # data train 70 % and test 30 % x_train, x_test, y_train, y_test = train_test_split(x_1, y, test_size=0.3, random_state=42) clf_rf = RandomForestClassifier(random_state=43) clr_rf = clf_rf.fit(X_train,Y_train) ac = accuracy_score(Y_test,clf_rf.predict(X_test))