Ejemplo n.º 1
0
def reduction_cluster_nn(X, y, problem):
    n = len(X[0])
    sm = SMOTE()
    rf = RandomForestClassifier(n_estimators=100,
                                class_weight='balanced',
                                random_state=5,
                                n_jobs=7)
    filtr = ImportanceSelect(rf)
    km = KMeans(random_state=5)
    mlp = MLPClassifier(solver='adam',
                        alpha=1e-5,
                        shuffle=True,
                        early_stopping=True,
                        activation='relu',
                        verbose=True)
    X_res, y_res = sm.fit_sample(X, y)

    parameters = {
        'NN__hidden_layer_sizes': [(n, n, n, n, n)],
        'filtr__n': [2, 5, 10, 15, 20],
        'km__n_clusters': [2, 3, 4, 5, 6],
    }

    sss = StratifiedShuffleSplit(
        n_splits=5,
        test_size=0.2)  ## no need for this given 50000 random sample
    pipe = Pipeline([('filtr', filtr), ('km', km), ('NN', mlp)])
    gs = GridSearchCV(pipe, parameters, verbose=10, cv=sss)

    gs.fit(X_res, y_res)
    clf = gs.best_estimator_
    print(clf)
    print(gs.best_score_)

    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + problem + ' dr_cluster_nn.csv')

    return clf, gs.best_score_, gs
Ejemplo n.º 2
0
fi_RF_pageblocks = rfc.fit(blocks_X, blocks_Y).feature_importances_

plt.plot(range(10), fi_RF_pageblocks, marker='o', markersize=4, linestyle="-")
plt.title("Pageblocks Feature Importance by RF")
plt.xlabel("Feature")
plt.ylabel("Importance")
plt.savefig(out + 'Pageblock_part2_RF.png')
plt.close()

tmp2 = pd.Series(np.sort(fi_RF_pageblocks)[::-1])
tmp2.to_csv(out + 'fi_RF_pageblocks.csv')

#%% Validation for part2

filtr = ImportanceSelect(rfc)

dims1 = [2, 4, 5, 10, 15, 20, 26]
grid = {'filter__n': dims1}
mlp = MLPClassifier(solver='lbfgs',
                    activation='identity',
                    alpha=0.1,
                    hidden_layer_sizes=(50, ),
                    max_iter=2000,
                    early_stopping=True,
                    random_state=5)
pipe = Pipeline([('filter', filtr), ('NN', mlp)])
gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

gs.fit(loans_X, loans_Y)
tmp = pd.DataFrame(gs.cv_results_)
Ejemplo n.º 3
0
                                 random_state=5,
                                 n_jobs=7)
    fs_madelon = rfc.fit(madelonX, madelonY).feature_importances_
    fs_digits = rfc.fit(digitsX, digitsY).feature_importances_

    tmp = pd.Series(np.sort(fs_madelon)[::-1])
    tmp.to_csv(out + 'madelon scree.csv')

    tmp = pd.Series(np.sort(fs_digits)[::-1])
    tmp.to_csv(out + 'digits scree.csv')

    # raise
    #%% Data for 2
    if flag == 1:
        nn_arch = nn_arch_madelon
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dims,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=2000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, n_jobs=num_jobs, verbose=10, cv=5)

    gs.fit(madelonX, madelonY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'Madelon dim red.csv')
Ejemplo n.º 4
0
def main():
    out = './BASES/'

    np.random.seed(0)
    character = pd.read_hdf('./BASES/datasets.hdf', 'character')
    character_X = character.drop('Class', 1).copy().values
    character_Y = character['Class'].copy().values

    madelon = pd.read_hdf('./BASES/datasets.hdf', 'madelon')
    madelon_X = madelon.drop('Class', 1).copy().values
    madelon_Y = madelon['Class'].copy().values

    madelon_X = StandardScaler().fit_transform(madelon_X)
    character_X = StandardScaler().fit_transform(character_X)

    # clusters = [2, 5, 10, 15, 20, 25, 30, 35, 40]
    dim_red = [2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
    dims_red_s = [2, 4, 6, 8, 10, 12, 14, 16]

    # %% data for 1

    rfc = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=5, n_jobs=7)
    fs_madelon = rfc.fit(madelon_X, madelon_Y).feature_importances_
    fs_character = rfc.fit(character_X, character_Y).feature_importances_

    tmp = pd.Series(np.sort(fs_madelon)[::-1])
    tmp.to_csv(out + 'madelon scree.csv')

    tmp = pd.Series(np.sort(fs_character)[::-1])
    tmp.to_csv(out + 'character_scree.csv')

    # %% Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {'filter__n': dim_red, 'NN__alpha': nn_reg, 'NN__hidden_layer_sizes': nn_arch}
    mlp = MLPClassifier(activation='relu', max_iter=2000, early_stopping=True, random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(madelon_X, madelon_Y)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'Madelon dim red.csv')

    grid = {'filter__n': dims_red_s, 'NN__alpha': nn_reg, 'NN__hidden_layer_sizes': nn_arch}
    mlp = MLPClassifier(activation='relu', max_iter=2000, early_stopping=True, random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(character_X, character_Y)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'character_dim_red.csv')
    #    raise
    # %% data for 3
    # Set this from chart 2 and dump, use clustering script to finish up
    dim = 10
    filtr = ImportanceSelect(rfc, dim)

    madelon_X2 = filtr.fit_transform(madelon_X, madelon_Y)
    madelon_2 = pd.DataFrame(np.hstack((madelon_X2, np.atleast_2d(madelon_Y).T)))
    cols = list(range(madelon_2.shape[1]))
    cols[-1] = 'Class'
    madelon_2.columns = cols
    madelon_2.to_hdf(out + 'datasets.hdf', 'madelon', complib='blosc', complevel=9)

    dim = 10
    filtr = ImportanceSelect(rfc, dim)
    character_X2 = filtr.fit_transform(character_X, character_Y)
    character_2 = pd.DataFrame(np.hstack((character_X2, np.atleast_2d(character_Y).T)))
    cols = list(range(character_2.shape[1]))
    cols[-1] = 'Class'
    character_2.columns = cols
    character_2.to_hdf(out + 'datasets.hdf', 'character', complib='blosc', complevel=9)
Ejemplo n.º 5
0
                        max_iter=1000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('rp', rp), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(wineX, wineY)
    results[i]['RP'] = 100. * gs.best_score_
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv('./RP/nn.csv')

    rfc = RandomForestClassifier(n_estimators=100,
                                 class_weight='balanced',
                                 random_state=5,
                                 n_jobs=-1)
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': [i],
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=1000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(wineX, wineY)
    results[i]['RF'] = 100. * gs.best_score_
    tmp = pd.DataFrame(gs.cv_results_)
    indices = np.argsort(fs_contra)[::-1]
    for f in range(contraX.shape[1]):
        print("%d. feature %d (%f)" %
              (f + 1, indices[f], fs_contra[indices[f]]))

    tmp = pd.Series(np.sort(fs_cancer)[::-1])
    tmp.to_csv(out + 'cancer scree.csv')

    indices = np.argsort(fs_cancer)[::-1]
    for f in range(cancerX.shape[1]):
        print("%d. feature %d (%f)" %
              (f + 1, indices[f], fs_cancer[indices[f]]))

    #%% Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dims_contra,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=2000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(contraX, contraY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'contra dim red.csv')
Ejemplo n.º 7
0
    rfc = RandomForestClassifier(n_estimators=100,
                                 class_weight='balanced',
                                 random_state=5,
                                 n_jobs=7)

    fs_wine = rfc.fit(wineX, wineY).feature_importances_
    fs_digit = rfc.fit(digitX, digitY).feature_importances_

    tmp = pd.Series(np.sort(fs_wine)[::-1])
    tmp.to_csv(out + 'wine scree.csv')

    tmp = pd.Series(np.sort(fs_digit)[::-1])
    tmp.to_csv(out + 'digit scree.csv')

    # Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dims_wine,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=2000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(wineX, wineY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'wine dim red.csv')
Ejemplo n.º 8
0
cluster_range = range(1, 11)
dims = range(1, 30)

rfc = RandomForestClassifier(n_estimators=100,
                             class_weight='balanced',
                             random_state=5,
                             n_jobs=-1)
fs_br = rfc.fit(brX, brY).feature_importances_
tmp = pd.Series(np.sort(fs_br)[::-1])
tmp.to_csv('./RF/breast_scree.csv')

barplot_breast(tmp)

dim = 10
filtr = ImportanceSelect(rfc, dim)
brX2 = filtr.fit_transform(brX, brY)
br2 = pd.DataFrame(np.hstack((brX2, np.atleast_2d(brY).T)))
cols = list(range(br2.shape[1]))
cols[-1] = 'Class'
br2.columns = cols
br2.to_csv('./RF/breast.csv')

# Abalone Dataset
abalone = pd.read_csv('./BASE/abalone.csv')
abaloneX = abalone.drop('Class', 1).copy().values
abaloneY = abalone['Class'].copy().values
abaloneX = StandardScaler().fit_transform(abaloneX)

cluster_range = range(1, 11)
dims = range(1, 10)
# raise Exception('Remove this line to run code')

#2

rfc = RandomForestClassifier(n_estimators=100,class_weight='balanced',random_state=5,n_jobs=7)
fs_perm = rfc.fit(perm_x,perm_y).feature_importances_ 
fs_housing = rfc.fit(housing_x,housing_y).feature_importances_ 

tmp = pd.Series(np.sort(fs_perm)[::-1])
tmp.to_csv(out+'perm scree.csv')

tmp = pd.Series(np.sort(fs_housing)[::-1])
tmp.to_csv(out+'housing scree.csv')

#4
filtr = ImportanceSelect(rfc)
grid ={'filter__n':dims,'NN__alpha':nn_reg,'NN__hidden_layer_sizes':nn_layers}
mlp = MLPClassifier(activation='relu',max_iter=nn_iter,early_stopping=True,random_state=5)
pipe = Pipeline([('filter',filtr),('NN',mlp)])
gs = GridSearchCV(pipe,grid,verbose=10,cv=5)

gs.fit(perm_x,perm_y)
tmp = pd.DataFrame(gs.cv_results_)
tmp.to_csv(out+'perm dim red.csv')


grid ={'filter__n':dims_big,'NN__alpha':nn_reg,'NN__hidden_layer_sizes':nn_layers}  
mlp = MLPClassifier(activation='relu',max_iter=nn_iter,early_stopping=True,random_state=5)
pipe = Pipeline([('filter',filtr),('NN',mlp)])
gs = GridSearchCV(pipe,grid,verbose=10,cv=5)
Ejemplo n.º 10
0
    rfc = RandomForestClassifier(n_estimators=100,
                                 class_weight='balanced',
                                 random_state=5,
                                 n_jobs=7)
    fs_biodeg = rfc.fit(biodegX, biodegY).feature_importances_
    fs_digits = rfc.fit(digitsX, digitsY).feature_importances_

    tmp = pd.Series(np.sort(fs_biodeg)[::-1])
    tmp.to_csv(out + 'biodeg scree.csv')

    tmp = pd.Series(np.sort(fs_digits)[::-1])
    tmp.to_csv(out + 'digits scree.csv')

    #%% Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dimsb,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=2000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(biodegX, biodegY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'Biodeg dim red.csv')
Ejemplo n.º 11
0
    tmp.to_csv(out + 'wine scree.csv')

    indices = np.argsort(fs_wine)[::-1]
    for f in range(wineX.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], fs_wine[indices[f]]))

    tmp = pd.Series(np.sort(fs_cancer)[::-1])
    tmp.to_csv(out + 'cancer scree.csv')

    indices = np.argsort(fs_cancer)[::-1]
    for f in range(cancerX.shape[1]):
        print("%d. feature %d (%f)" %
              (f + 1, indices[f], fs_cancer[indices[f]]))

    #%% Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dims_wine,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=2000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(wineX, wineY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'wine dim red.csv')
Ejemplo n.º 12
0
    rfc = RandomForestClassifier(n_estimators=100,
                                 class_weight='balanced',
                                 random_state=5,
                                 n_jobs=7)
    fs_faults = rfc.fit(faultsX, faultsY).feature_importances_
    fs_bc = rfc.fit(bcX, bcY).feature_importances_

    tmp = pd.Series(np.sort(fs_faults)[::-1])
    tmp.to_csv(out1 + 'faults scree.csv')

    tmp = pd.Series(np.sort(fs_bc)[::-1])
    tmp.to_csv(out1 + 'bc scree.csv')

    #%% Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dims,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=200,
                        early_stopping=True,
                        random_state=5,
                        learning_rate_init=0.1,
                        momentum=0.3)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(faultsX, faultsY)
Ejemplo n.º 13
0
fs_digits = rfc.fit(digitsX, digitsY).feature_importances_
print('Part 2D - Starting RF for segmentation dataset...')
fs_seg = rfc.fit(segX, segY).feature_importances_

tmp = pd.Series(np.sort(fs_digits)[::-1])
tmp.to_csv('./P2_Dimensionality_Reduction/digits_RF_feature_importance.csv')

tmp = pd.Series(np.sort(fs_seg)[::-1])
tmp.to_csv('./P2_Dimensionality_Reduction/seg_RF_feature_importance.csv')

# Run Neural Networks
rfc = RandomForestClassifier(n_estimators=100,
                             class_weight='balanced',
                             random_state=5,
                             n_jobs=7)
filtr = ImportanceSelect(rfc)
grid = {
    'filter__n': dims_digits,
    'NN__learning_rate_init': nn_lr,
    'NN__hidden_layer_sizes': nn_arch
}
mlp = MLPClassifier(activation='relu',
                    max_iter=2000,
                    early_stopping=True,
                    random_state=5)
pipe = Pipeline([('filter', filtr), ('NN', mlp)])
gs = GridSearchCV(pipe, grid, verbose=10, cv=5)
gs.fit(digitsX, digitsY)
nn_results = pd.DataFrame(gs.cv_results_)
nn_results.to_csv('./P4_Neural_Networks_Reduced/digits_RF_nn_results.csv')
Ejemplo n.º 14
0
blocks_balanced = class1
for n in range(2, 6):
    blocks_balanced = blocks_balanced.append(blocks[blocks['Class'] == n])
blocks_X = blocks_balanced.drop('Class', 1).copy().values
blocks_Y = blocks_balanced['Class'].copy().values
blocks_X = StandardScaler().fit_transform(blocks_X)
print blocks_X.shape

rfc = RandomForestClassifier(n_estimators=100,
                             class_weight='balanced',
                             random_state=5,
                             n_jobs=7)
#%% Select features by random forests

dim = 5
filtr = ImportanceSelect(rfc, dim)

loansX2 = filtr.fit_transform(loans_X, loans_Y)
loans2 = pd.DataFrame(np.hstack((loansX2, np.atleast_2d(loans_Y).T)))
cols = list(range(loans2.shape[1]))
cols[-1] = 'Class'
loans2.columns = cols
#madelon2.to_hdf(out+'datasets.hdf','madelon',complib='blosc',complevel=9)

#%%Clustering on selected data
km = kmeans(random_state=5)
gmm = GMM(random_state=5)

clusters = [2, 3, 4, 5, 8, 12, 15, 18, 21, 25]

loans_km_acc = []
Ejemplo n.º 15
0
    rfc = RandomForestClassifier(n_estimators=100,
                                 class_weight='balanced',
                                 random_state=5,
                                 n_jobs=7)
    fs_abalone = rfc.fit(abaloneX, abaloneY).feature_importances_
    fs_digits = rfc.fit(digitsX, digitsY).feature_importances_

    tmp = pd.Series(np.sort(fs_abalone)[::-1])
    tmp.to_csv(out + 'abalone scree.csv')

    tmp = pd.Series(np.sort(fs_digits)[::-1])
    tmp.to_csv(out + 'digits scree.csv')

    #%% Data for 2
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': abalone_dims,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch,
        'NN__activation': nn_activation
    }
    mlp = MLPClassifier(max_iter=2000, early_stopping=True, random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5, scoring='f1_macro')

    gs.fit(abaloneX, abaloneY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'abalone dim red.csv')

    grid = {
Ejemplo n.º 16
0
    rfc = RandomForestClassifier(n_estimators=100,
                                 class_weight='balanced',
                                 random_state=5,
                                 n_jobs=1)
    fs_diamonds = rfc.fit(diamondsX, diamondsY).feature_importances_
    fs_digits = rfc.fit(digitsX, digitsY).feature_importances_

    tmp = pd.Series(np.sort(fs_diamonds)[::-1])
    tmp.to_csv(out + 'diamonds scree.csv')

    tmp = pd.Series(np.sort(fs_digits)[::-1])
    tmp.to_csv(out + 'digits scree.csv')

    #%% task 4
    filtr = ImportanceSelect(rfc)
    grid = {
        'filter__n': dims1,
        'NN__alpha': nn_reg,
        'NN__hidden_layer_sizes': nn_arch
    }
    mlp = MLPClassifier(activation='relu',
                        max_iter=2000,
                        early_stopping=True,
                        random_state=5)
    pipe = Pipeline([('filter', filtr), ('NN', mlp)])
    gs = GridSearchCV(pipe, grid, verbose=10, cv=5)

    gs.fit(diamondsX, diamondsY)
    tmp = pd.DataFrame(gs.cv_results_)
    tmp.to_csv(out + 'diamonds dim red.csv')
Ejemplo n.º 17
0
fs_spam = rfc.fit(spamX, spamY).feature_importances_
print('Part 2D - Starting RF for letter dataset...')
fs_letter = rfc.fit(letterX, letterY).feature_importances_

tmp = pd.Series(np.sort(fs_spam)[::-1])
tmp.to_csv('./P2_Dimensionality_Reduction/spam_RF_feature_importance.csv')

tmp = pd.Series(np.sort(fs_letter)[::-1])
tmp.to_csv('./P2_Dimensionality_Reduction/letter_RF_feature_importance.csv')

# Run Neural Networks
rfc = RandomForestClassifier(n_estimators=100,
                             class_weight='balanced',
                             random_state=5,
                             n_jobs=7)
filtr = ImportanceSelect(rfc)
grid = {
    'filter__n': dims_spam,
    'NN__learning_rate_init': nn_lr,
    'NN__hidden_layer_sizes': nn_arch
}
mlp = MLPClassifier(activation='relu',
                    max_iter=2000,
                    early_stopping=True,
                    random_state=5)
pipe = Pipeline([('filter', filtr), ('NN', mlp)])
gs = GridSearchCV(pipe, grid, verbose=10, cv=5)
gs.fit(spamX, spamY)
nn_results = pd.DataFrame(gs.cv_results_)
nn_results.to_csv('./P4_Neural_Networks_Reduced/spam_RF_nn_results.csv')
Ejemplo n.º 18
0
out = '../results/random_forest/'

cancer_x, cancer_y, housing_x, housing_y = load_data() # cancer, housing

rfc = RandomForestClassifier(n_estimators=100,class_weight='balanced',random_state=5,n_jobs=7)
fs_cancer = rfc.fit(cancer_x,cancer_y).feature_importances_
fs_housing = rfc.fit(housing_x,housing_y).feature_importances_

tmp = pd.Series(np.sort(fs_cancer)[::-1])
tmp.to_csv(out+'cancer part 2.csv')

tmp = pd.Series(np.sort(fs_housing)[::-1])
tmp.to_csv(out+'housing part 2.csv')

dims = list(range(1, 31))
filtr = ImportanceSelect(rfc)
grid ={'filter__n':dims,'NN__alpha':nn_reg,'NN__hidden_layer_sizes':nn_layers}
mlp = MLPClassifier(activation='relu',max_iter=nn_iter,early_stopping=True,random_state=5)
pipe = Pipeline([('filter',filtr),('NN',mlp)])
gs = GridSearchCV(pipe,grid,verbose=10,cv=5)

gs.fit(cancer_x,cancer_y)
tmp = pd.DataFrame(gs.cv_results_)
tmp.to_csv(out+'cancer part 4.csv')


grid ={'filter__n':dims_big,'NN__alpha':nn_reg,'NN__hidden_layer_sizes':nn_layers}  
mlp = MLPClassifier(activation='relu',max_iter=nn_iter,early_stopping=True,random_state=5)
pipe = Pipeline([('filter',filtr),('NN',mlp)])
gs = GridSearchCV(pipe,grid,verbose=10,cv=5)