예제 #1
0
def problem3_3_5(data):
    X = NomToNum(data)
    y = X[6]
    X = X.drop([6], axis=1)
    selectedIDX = cfs.cfs(X.values, y.values)
    newdata = X.loc[:, selectedIDX]
    return selectedIDX, newdata
예제 #2
0
	def fit_interact_cfs(self, X, y, mode='uc', threshold=0,
			backward=True, look_ahead=1, **kwargs):
		cols1 = interact(X, y, threshold)
		cols2 = cfs(X, y, backward, look_ahead, mode)
		cols_set = set(cols1) | set(cols2)
		cols = list(cols_set)
		cols.sort()
		self.cols_ = cols
예제 #3
0
	def fit_cfs(self, X, y, mode='uc', backward=True, look_ahead=1):
		self.cols_ = cfs(X, y, backward, look_ahead, mode)
예제 #4
0
import cfs
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

data = pd.read_csv('data/mama.csv')
col = data.columns
y = data.diagnosis                          
list = ['Unnamed: 32','id','diagnosis']
x = data.drop(list,axis = 1)

B, M = y.value_counts()
print('Number of Benign: ',B)
print('Number of Malignant : ',M)

features = cfs.cfs(np.asarray(x), np.asarray(y)) 
# // Features Selecionadas = [24, 8, 20, 0, 4, 1]

# Removendo Atributos Não Selecionados
x_1 = x.drop(x.columns[[2,3,5,6,7,9,10,11,12,13,14,15,
                    16,17,18,19,21,22,23,25,26,27,28,29]], axis=1)

# ORIGINAL
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# data train 70 % and test 30 %
x_train, x_test, y_train, y_test = train_test_split(x_1, y, test_size=0.3, random_state=42)

clf_rf = RandomForestClassifier(random_state=43)      
clr_rf = clf_rf.fit(X_train,Y_train)
ac = accuracy_score(Y_test,clf_rf.predict(X_test))