コード例 #1
0
pipeM = Pipeline([  #('Scale',StandardScaler()),
    ('MLP', MLPClassifier(max_iter=2000, early_stopping=False,
                          random_state=55))
])

d = data_x.shape[1]
hiddens_data = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
alphas = [10**-x for x in np.arange(-1, 3.01, 1)]

params = {
    'MLP__activation': ['relu', 'logistic'],
    'MLP__hidden_layer_sizes': hiddens_data,
    'MLP__alpha': alphas
}

data_clf = basicResults(pipeM, data_train_x, data_train_y, data_test_x,
                        data_test_y, params, 'ANN', dataset)

data_final_params = data_clf.best_params_

pipeM.set_params(**data_final_params)
makeTimingCurve(data_x, data_y, pipeM, 'ANN', dataset)

iterationLC(pipeM,
            data_train_x,
            data_train_y,
            data_test_x,
            data_test_y, {'MLP__max_iter': [2**x for x in range(8)]},
            'ANN',
            dataset=dataset)
コード例 #2
0
adult_clf = basicResults(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
                         params_adult, 'ANN', 'adult')

adult_final_params = adult_clf.best_params_
adult_OF_params = adult_final_params.copy()
adult_OF_params['MLP__alpha'] = 0

pipeA.set_params(**adult_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(adultX, adultY, pipeA, 'ANN', 'adult')

pipeA.set_params(**adult_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'adult')

pipeA.set_params(**adult_OF_params)
pipeA.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN_OF', 'adult')

biodeg = pd.read_hdf('datasets.hdf', 'biodeg')
biodegX = biodeg.drop('clas', 1).copy().values
biodegY = biodeg['clas'].copy().values
コード例 #3
0
ファイル: ANN.py プロジェクト: hymntaha/assignment1
def main():

    adult = pd.read_csv('data/adult_parsed.csv')
    adult['net_capital'] = adult['capital-gain'] - adult['capital-loss']
    adult = adult.drop(["fnlwgt", "capital-gain", "capital-loss", "workclass"],
                       axis=1)

    adult['income'] = adult['income'].map({'<=50K': 0, '>50K': 1})
    adult['gender'] = adult['gender'].map({'Male': 0, 'Female': 1}).astype(int)
    adult['race'] = adult['race'].map({
        'Black': 0,
        'Asian-Pac-Islander': 1,
        'Other': 2,
        'White': 3,
        'Amer-Indian-Eskimo': 4
    }).astype(int)
    adult['marital-status'] = adult['marital-status'].map({
        'Never-married':
        0,
        'Widowed':
        1,
        'Divorced':
        2,
        'Separated':
        3,
        'Married-spouse-absent':
        4,
        'Married-civ-spouse':
        5,
        'Married-AF-spouse':
        6
    })
    adult['education'] = adult['education'].map({
        'Preschool': 0,
        '1st-4th': 1,
        '5th-6th': 2,
        '7th-8th': 3,
        '9th': 4,
        '10th': 5,
        '11th': 6,
        '12th': 7,
        'Prof-school': 8,
        'HS-grad': 9,
        'Some-college': 10,
        'Assoc-voc': 11,
        'Assoc-acdm': 12,
        'Bachelors': 13,
        'Masters': 14,
        'Doctorate': 15
    })

    adult['occupation'] = adult['occupation'].map({
        'Priv-house-serv': 0,
        'Protective-serv': 1,
        'Handlers-cleaners': 2,
        'Machine-op-inspct': 3,
        'Adm-clerical': 4,
        'Farming-fishing': 5,
        'Transport-moving': 6,
        'Craft-repair': 7,
        'Other-service': 8,
        'Tech-support': 9,
        'Sales': 10,
        'Exec-managerial': 11,
        'Prof-specialty': 12,
        'Armed-Forces': 13
    })
    adult['native-country'] = adult['native-country'].map({
        '?':
        -1,
        'Puerto-Rico':
        0,
        'Haiti':
        1,
        'Cuba':
        2,
        'Iran':
        3,
        'Honduras':
        4,
        'Jamaica':
        5,
        'Vietnam':
        6,
        'Mexico':
        7,
        'Dominican-Republic':
        8,
        'Laos':
        9,
        'Ecuador':
        10,
        'El-Salvador':
        11,
        'Cambodia':
        12,
        'Columbia':
        13,
        'Guatemala':
        14,
        'South':
        15,
        'India':
        16,
        'Nicaragua':
        17,
        'Yugoslavia':
        18,
        'Philippines':
        19,
        'Thailand':
        20,
        'Trinadad&Tobago':
        21,
        'Peru':
        22,
        'Poland':
        23,
        'China':
        24,
        'Hungary':
        25,
        'Greece':
        26,
        'Taiwan':
        27,
        'Italy':
        28,
        'Portugal':
        29,
        'France':
        30,
        'Hong':
        31,
        'England':
        32,
        'Scotland':
        33,
        'Ireland':
        34,
        'Holand-Netherlands':
        35,
        'Canada':
        36,
        'Germany':
        37,
        'Japan':
        38,
        'Outlying-US(Guam-USVI-etc)':
        39,
        'United-States':
        40
    })

    adult['relationship'] = adult['relationship'].map({
        'Unmarried': 0,
        'Other-relative': 1,
        'Not-in-family': 2,
        'Wife': 3,
        'Husband': 4,
        'Own-child': 5
    })

    adult = pd.get_dummies(adult)
    adult_income_X = adult.drop('income', 1).copy().values
    adult_income_Y = adult['income'].copy().values

    # wine_data = pd.read_csv('data/wine-red-white-merge.csv')
    # wineX = wine_data.drop('quality',1).copy().values
    # wineY = wine_data['quality'].copy().values

    adult_trgX, adult_tstX, adult_trgY, adult_tstY = ms.train_test_split(
        adult_income_X,
        adult_income_Y,
        test_size=0.3,
        random_state=0,
        stratify=adult_income_Y)
    # wine_trgX, wine_tstX, wine_trgY, wine_tstY = ms.train_test_split(wineX, wineY, test_size=0.3, random_state=0,stratify=wineY)

    pipeA = Pipeline([('Scale', StandardScaler()),
                      ('MLP',
                       MLPClassifier(max_iter=2000,
                                     early_stopping=True,
                                     random_state=55))])

    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('Cull3',
                       SelectFromModel(RandomForestClassifier(random_state=3),
                                       threshold='median')),
                      ('Cull4',
                       SelectFromModel(RandomForestClassifier(random_state=4),
                                       threshold='median')),
                      ('MLP',
                       MLPClassifier(max_iter=2000,
                                     early_stopping=True,
                                     random_state=55))])

    d = adult_income_X.shape[1]
    hiddens_adult = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
    alphas = [10**-x for x in np.arange(-1, 5.01, 1 / 2)]
    alphasM = [10**-x for x in np.arange(-1, 9.01, 1 / 2)]
    # d = wineX.shape[1]
    hiddens_wine = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
    params_adult = {
        'MLP__activation': ['relu', 'logistic'],
        'MLP__alpha': alphas,
        'MLP__hidden_layer_sizes': hiddens_adult
    }
    # params_wine = {'MLP__activation':['relu','logistic'],'MLP__alpha':alphas,'MLP__hidden_layer_sizes':hiddens_wine}
    #
    # wine_clf = basicResults(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,params_wine,'ANN','wine')
    adult_clf = basicResults(pipeA, adult_trgX, adult_trgY, adult_tstX,
                             adult_tstY, params_adult, 'ANN', 'adult')

    #wine_final_params = {'MLP__hidden_layer_sizes': (500,), 'MLP__activation': 'logistic', 'MLP__alpha': 10.0}
    #adult_final_params ={'MLP__hidden_layer_sizes': (28, 28, 28), 'MLP__activation': 'logistic', 'MLP__alpha': 0.0031622776601683794}

    # wine_final_params = wine_clf.best_params_
    adult_final_params = adult_clf.best_params_
    adult_OF_params = adult_final_params.copy()
    adult_OF_params['MLP__alpha'] = 0
    # wine_OF_params =wine_final_params.copy()
    # wine_OF_params['MLP__alpha'] = 0

    #raise

    #
    # pipeM.set_params(**wine_final_params)
    pipeM.set_params(**{'MLP__early_stopping': False})
    # makeTimingCurve(wineX,wineY,pipeM,'ANN','wine')
    pipeA.set_params(**adult_final_params)
    pipeA.set_params(**{'MLP__early_stopping': False})
    makeTimingCurve(adult_income_X, adult_income_Y, pipeA, 'ANN', 'adult')

    # pipeM.set_params(**wine_final_params)
    pipeM.set_params(**{'MLP__early_stopping': False})
    # iterationLC(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN','wine')
    pipeA.set_params(**adult_final_params)
    pipeA.set_params(**{'MLP__early_stopping': False})
    iterationLC(
        pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY, {
            'MLP__max_iter': [2**x for x in range(12)] +
            [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
        }, 'ANN', 'adult')

    # pipeM.set_params(**wine_OF_params)
    pipeM.set_params(**{'MLP__early_stopping': False})
    # iterationLC(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN_OF','wine')
    pipeA.set_params(**adult_OF_params)
    pipeA.set_params(**{'MLP__early_stopping': False})
    iterationLC(
        pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY, {
            'MLP__max_iter': [2**x for x in range(12)] +
            [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
        }, 'ANN_OF', 'adult')
コード例 #4
0
# print("train",np.unique(adult_tstY))
adult_clf = basicResults(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
                         params_adult, 'SVM_Lin', 'adult')

adult_final_params = adult_clf.best_params_
#adult_OF_params ={'SVM__n_iter': 55, 'SVM__alpha': 1e-16}
#
#
adult_OF_params = adult_final_params.copy()
adult_OF_params['SVM__alpha'] = 1e-16

pipeA.set_params(**adult_final_params)
makeTimingCurve(adultX, adultY, pipeA, 'SVM_Lin', 'adult')

pipeA.set_params(**adult_final_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_Lin', 'adult')
#
pipeA.set_params(**adult_OF_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'SVM__n_iter': np.arange(1, 200, 5)}, 'SVM_LinOF', 'adult')
#pipeM.set_params(**madelon_OF_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'SVM__n_iter':np.arange(100,2600,100)},'SVM_LinOF','madelon')

#RBF SVM
gamma_fracsA = np.arange(0.2, 2.1, 0.2)

pipeA = Pipeline([('Scale', StandardScaler()), ('SVM', primalSVM_RBF())])

params_adult = {
    'SVM__alpha': alphas,
    'SVM__n_iter': [int((1e6 / N_adult) / .8) + 1],
コード例 #5
0
complexity_params = {
    'name': 'SVM__C',
    'display_name': 'Penalty',
    'values': np.arange(0.001, 2.5, 0.1)
}

data_clf = basicResults(pipeM,
                        data_train_x,
                        data_train_y,
                        data_test_x,
                        data_test_y,
                        params,
                        'SVM',
                        dataset,
                        scorer='f1',
                        complexity_curve=True,
                        complexity_params=complexity_params,
                        clf_name='SVM')
data_final_params = data_clf.best_params_

pipeM.set_params(**data_final_params)
makeTimingCurve(data_x, data_y, pipeM, 'SVM', dataset)

iterationLC(pipeM,
            data_train_x,
            data_train_y,
            data_test_x,
            data_test_y, {'SVM__max_iter': range(1, 250, 10)},
            'SVM',
            dataset=dataset,
            scorer='f1')
コード例 #6
0
    'Boost__n_estimators': [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110],
    'Boost__learning_rate': [(2**x) / 100 for x in range(8)] + [1]
}
# paramsA= {'Boost__n_estimators':[1,2,5,10,20,30,45,60,80,100],
#           'Boost__base_estimator__alpha':alphas}

booster = AdaBoostClassifier(algorithm='SAMME',
                             learning_rate=1,
                             base_estimator=base,
                             random_state=55)

pipeM = Pipeline([  #('Scale',StandardScaler()),
    # ('Cull1',SelectFromModel(RandomForestClassifier(random_state=1),threshold='median')),
    ('Boost', booster)
])

data_clf = basicResults(pipeM, data_train_x, data_train_y, data_test_x,
                        data_test_y, params, 'Boost', dataset)
data_final_params = data_clf.best_params_

pipeM.set_params(**data_final_params)
makeTimingCurve(data_x, data_y, pipeM, 'Boost', dataset)

iterationLC(pipeM,
            data_train_x,
            data_train_y,
            data_test_x,
            data_test_y,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]},
            'Boost',
            dataset=dataset)
コード例 #7
0
#madelon_final_params = {'n_estimators': 20, 'learning_rate': 0.02}
#adult_final_params = {'n_estimators': 10, 'learning_rate': 1}
#OF_params = {'learning_rate':1}

madelon_final_params = madelon_clf.best_params_
adult_final_params = adult_clf.best_params_
OF_params = {'Boost__base_estimator__alpha': -1, 'Boost__n_estimators': 50}

##
pipeM.set_params(**madelon_final_params)
pipeA.set_params(**adult_final_params)
makeTimingCurve(madelonX, madelonY, pipeM, 'Boost', 'madelon')
makeTimingCurve(adultX, adultY, pipeA, 'Boost', 'adult')
#
pipeM.set_params(**madelon_final_params)
iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY, {
    'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
}, 'Boost', 'madelon')
pipeA.set_params(**adult_final_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'adult')
pipeM.set_params(**OF_params)
iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY, {
    'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
}, 'Boost_OF', 'madelon')
pipeA.set_params(**OF_params)
iterationLC(pipeA, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost_OF',
            'adult')
コード例 #8
0
params_adult = {'SVM__alpha':alphas,'SVM__n_iter':[int((1e6/N_adult)/.8)+1]}

adult_clf = basicResults(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,params_adult,'SVM_Lin','adult')        

adult_final_params =adult_clf.best_params_
#adult_OF_params ={'SVM__n_iter': 55, 'SVM__alpha': 1e-16}
#
#
adult_OF_params = adult_final_params.copy()
adult_OF_params['SVM__alpha'] = 1e-16

pipeA.set_params(**adult_final_params)
makeTimingCurve(adultX,adultY,pipeA,'SVM_Lin','adult')

pipeA.set_params(**adult_final_params)
iterationLC(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,{'SVM__n_iter':np.arange(1,75,3)},'SVM_Lin','adult')                
#
pipeA.set_params(**adult_OF_params)
iterationLC(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,{'SVM__n_iter':np.arange(1,200,5)},'SVM_LinOF','adult')                
#pipeM.set_params(**madelon_OF_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'SVM__n_iter':np.arange(100,2600,100)},'SVM_LinOF','madelon')                






#RBF SVM
gamma_fracsA = np.arange(0.2,2.1,0.2)

pipeA = Pipeline([('Scale',StandardScaler()),
コード例 #9
0
#madelon_final_params = madelon_clf.best_params_
adult_final_params = adult_clf.best_params_
OF_params = {'Boost__base_estimator__alpha': -1, 'Boost__n_estimators': 50}

##
#pipeM.set_params(**madelon_final_params)
pipeA.set_params(**adult_final_params)
#makeTimingCurve(madelonX,madelonY,pipeM,'Boost','madelon')
makeTimingCurve(adultX, adultY, pipeA, 'Boost', 'adult')
#
#pipeM.set_params(**madelon_final_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost','madelon')
pipeA.set_params(**adult_final_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'adult')
#pipeM.set_params(**OF_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost_OF','madelon')
pipeA.set_params(**OF_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost_OF',
            'adult')

biodeg = pd.read_hdf('datasets.hdf', 'biodeg')
biodegX = biodeg.drop('clas', 1).copy().values
biodegY = biodeg['clas'].copy().values

alphas = [
    -1, -1e-3, -(1e-3) * 10**-0.5, -1e-2, -(1e-2) * 10**-0.5, -1e-1,
    -(1e-1) * 10**-0.5, 0, (1e-1) * 10**-0.5, 1e-1, (1e-2) * 10**-0.5, 1e-2,
コード例 #10
0
def main():

    cars = pd.read_hdf('data/processed/datasets.hdf', 'cars')
    carsX = cars.drop('Class', 1).copy().values
    carsY = cars['Class'].copy().values

    madelon = pd.read_hdf('data/processed/datasets.hdf', 'madelon')
    madelonX = madelon.drop('Class', 1).copy().values
    madelonY = madelon['Class'].copy().values

    alphas = [
        -1, -1e-3, -(1e-3) * 10**-0.5, -1e-2, -(1e-2) * 10**-0.5, -1e-1,
        -(1e-1) * 10**-0.5, 0, (1e-1) * 10**-0.5, 1e-1, (1e-2) * 10**-0.5,
        1e-2, (1e-3) * 10**-0.5, 1e-3
    ]

    cars_trgX, cars_tstX, cars_trgY, cars_tstY = ms.train_test_split(
        carsX, carsY, test_size=0.3, random_state=0, stratify=carsY)
    madelon_trgX, madelon_tstX, madelon_trgY, madelon_tstY = ms.train_test_split(
        madelonX, madelonY, test_size=0.3, random_state=0, stratify=madelonY)

    madelon_base = dtclf_pruned(criterion='gini',
                                class_weight='balanced',
                                random_state=55)
    cars_base = dtclf_pruned(criterion='entropy',
                             class_weight='balanced',
                             random_state=55)
    OF_base = dtclf_pruned(criterion='gini',
                           class_weight='balanced',
                           random_state=55)
    #paramsA= {'Boost__n_estimators':[1,2,5,10,20,30,40,50],'Boost__learning_rate':[(2**x)/100 for x in range(8)]+[1]}
    paramsA = {
        'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100],
        'Boost__base_estimator__alpha': alphas
    }
    #paramsM = {'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100],
    #           'Boost__learning_rate':[(2**x)/100 for x in range(8)]+[1]}

    paramsM = {
        'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100],
        'Boost__base_estimator__alpha': alphas
    }

    madelon_booster = AdaBoostClassifier(algorithm='SAMME',
                                         learning_rate=1,
                                         base_estimator=madelon_base,
                                         random_state=55)
    cars_booster = AdaBoostClassifier(algorithm='SAMME',
                                      learning_rate=1,
                                      base_estimator=cars_base,
                                      random_state=55)
    OF_booster = AdaBoostClassifier(algorithm='SAMME',
                                    learning_rate=1,
                                    base_estimator=OF_base,
                                    random_state=55)

    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('Cull3',
                       SelectFromModel(RandomForestClassifier(random_state=3),
                                       threshold='median')),
                      ('Cull4',
                       SelectFromModel(RandomForestClassifier(random_state=4),
                                       threshold='median')),
                      ('Boost', madelon_booster)])

    pipeA = Pipeline([('Scale', StandardScaler()), ('Boost', cars_booster)])

    #
    madelon_clf = basicResults(pipeM, madelon_trgX, madelon_trgY, madelon_tstX,
                               madelon_tstY, paramsM, 'Boost', 'madelon')
    cars_clf = basicResults(pipeA, cars_trgX, cars_trgY, cars_tstX, cars_tstY,
                            paramsA, 'Boost', 'cars')

    #
    #
    #madelon_final_params = {'n_estimators': 20, 'learning_rate': 0.02}
    #cars_final_params = {'n_estimators': 10, 'learning_rate': 1}
    #OF_params = {'learning_rate':1}

    madelon_final_params = madelon_clf.best_params_
    cars_final_params = cars_clf.best_params_
    OF_params = {'Boost__base_estimator__alpha': -1, 'Boost__n_estimators': 50}

    ##
    pipeM.set_params(**madelon_final_params)
    pipeA.set_params(**cars_final_params)
    makeTimingCurve(madelonX, madelonY, pipeM, 'Boost', 'madelon')
    makeTimingCurve(carsX, carsY, pipeA, 'Boost', 'cars')
    #
    pipeM.set_params(**madelon_final_params)
    iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {
                    'Boost__n_estimators':
                    [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
                }, 'Boost', 'madelon')
    pipeA.set_params(**cars_final_params)
    iterationLC(pipeA, cars_trgX, cars_trgY, cars_tstX, cars_tstY,
                {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]},
                'Boost', 'cars')
    pipeM.set_params(**OF_params)
    iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {
                    'Boost__n_estimators':
                    [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
                }, 'Boost_OF', 'madelon')
    pipeA.set_params(**OF_params)
    iterationLC(pipeA, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]},
                'Boost_OF', 'cars')
コード例 #11
0
redwine_clf = basicResults(pipeR, redwine_trgX, redwine_trgY, redwine_tstX,
                           redwine_tstY, params_redwine, 'SVM_Lin', 'redwine')

#adult_final_params ={'SVM__alpha': 0.001, 'SVM__n_iter': 54.75}
adult_final_params = adult_clf.best_params_
adult_OF_params = {'SVM__n_iter': 55, 'SVM__alpha': 1e-16}
redwine_final_params = redwine_clf.best_params_
redwine_OF_params = {'SVM__n_iter': 55, 'SVM__alpha': 1e-16}

pipeA.set_params(**adult_final_params)
makeTimingCurve(adultX, adultY, pipeA, 'SVM_Lin', 'adult')
pipeR.set_params(**redwine_final_params)
makeTimingCurve(redwineX, redwineY, pipeR, 'SVM_Lin', 'redwine')

pipeA.set_params(**adult_final_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_Lin', 'adult')
pipeR.set_params(**redwine_final_params)
iterationLC(pipeR, redwine_trgX, redwine_trgY, redwine_tstX, redwine_tstY,
            {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_Lin', 'redwine')

pipeA.set_params(**adult_OF_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'SVM__n_iter': np.arange(1, 200, 5)}, 'SVM_LinOF', 'adult')
pipeR.set_params(**redwine_OF_params)
iterationLC(pipeR, redwine_trgX, redwine_trgY, redwine_tstX, redwine_tstY,
            {'SVM__n_iter': np.arange(1, 200, 5)}, 'SVM_LinOF', 'redwine')

# #RBF SVM
# gamma_fracsA = np.arange(0.2,2.1,0.2)
# gamma_fracsM = np.arange(0.2,2.1,0.2)
# gamma_fracsR = np.arange(0.2,2.1,0.2)
コード例 #12
0
makeTimingCurve(adultX, adultY, pipeA, 'ANN', 'adult')
pipeM.set_params(**mushrooms_final_params)
pipeM.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(mushroomsX, mushroomsY, pipeM, 'ANN', 'mushrooms')
pipeR.set_params(**redwine_final_params)
pipeR.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(redwineX, redwineY, pipeR, 'ANN', 'redwine')

#pipeM.set_params(**madelon_final_params)
#pipeM.set_params(**{'MLP__early_stopping':False})
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN','madelon')
pipeA.set_params(**adult_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'adult')
pipeM.set_params(**mushrooms_final_params)
pipeM.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeM, mushrooms_trgX, mushrooms_trgY, mushrooms_tstX, mushrooms_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'mushrooms')
pipeR.set_params(**redwine_final_params)
pipeR.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeR, redwine_trgX, redwine_trgY, redwine_tstX, redwine_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
コード例 #13
0
def main():

    # adult = pd.read_csv('data/adult_parsed.csv')
    # adult['net_capital'] = adult['capital-gain']-adult['capital-loss']
    # adult = adult.drop(["fnlwgt","capital-gain","capital-loss","workclass","native-country"],axis=1)
    #
    # adult['income']=adult['income'].map({'<=50K': 0, '>50K': 1})
    # adult['gender'] = adult['gender'].map({'Male': 0, 'Female': 1}).astype(int)
    # adult['race'] = adult['race'].map({'Black': 0, 'Asian-Pac-Islander': 1, 'Other': 2, 'White': 3,
    #                                    'Amer-Indian-Eskimo': 4}).astype(int)
    # adult['marital-status'] = adult['marital-status'].map({'Never-married':0,'Widowed':1,'Divorced':2, 'Separated':3,
    #                                                        'Married-spouse-absent':4, 'Married-civ-spouse':5, 'Married-AF-spouse':6})
    # adult['education'] = adult['education'].map({'Preschool':0,'1st-4th':1,'5th-6th':2, '7th-8th':3,
    #                                              '9th':4, '10th':5, '11th':6, '12th':7, 'Prof-school':8,
    #                                              'HS-grad':9, 'Some-college':10, 'Assoc-voc':11, 'Assoc-acdm':12,
    #                                              'Bachelors':13, 'Masters':14, 'Doctorate':15})
    #
    # adult['occupation'] = adult['occupation'].map({'Priv-house-serv':0,'Protective-serv':1,'Handlers-cleaners':2, 'Machine-op-inspct':3,
    #                                                'Adm-clerical':4, 'Farming-fishing':5, 'Transport-moving':6, 'Craft-repair':7, 'Other-service':8,
    #                                                'Tech-support':9, 'Sales':10, 'Exec-managerial':11, 'Prof-specialty':12, 'Armed-Forces':13 })
    #
    # adult['relationship'] = adult['relationship'].map({'Unmarried':0,'Other-relative':1, 'Not-in-family':2,
    #                                                    'Wife':3, 'Husband':4,'Own-child':5})
    #
    # adult = pd.get_dummies(adult)
    # adult_income_X = adult.drop('income',1).copy().values
    # adult_income_Y = adult['income'].copy().values

    wine_data = pd.read_csv('data/winequality_white.csv')
    wine_data['category'] = wine_data['quality'] >= 7

    wineX = wine_data[wine_data.columns[0:11]].values
    wineY = wine_data['category'].values.astype(np.int)

    alphas = np.append(np.arange(0.001, 0.05, 0.001), 0)

    # adult_income_trgX, adult_income_tstX, adult_income_trgY, adult_income_tstY = ms.train_test_split(adult_income_X, adult_income_Y, test_size=0.3, random_state=0,stratify=adult_income_Y)
    wine_trgX, wine_tstX, wine_trgY, wine_tstY = ms.train_test_split(
        wineX, wineY, test_size=0.3, random_state=0, stratify=wineY)

    # adult_income_base = dtclf_pruned(criterion='entropy',class_weight='balanced',random_state=55)
    wine_base = dtclf_pruned(criterion='gini',
                             class_weight='balanced',
                             random_state=55)

    OF_base = dtclf_pruned(criterion='gini',
                           class_weight='balanced',
                           random_state=55)
    #paramsA= {'Boost__n_estimators':[1,2,5,10,20,30,40,50],'Boost__learning_rate':[(2**x)/100 for x in range(8)]+[1]}
    paramsA = {
        'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100],
        'Boost__base_estimator__alpha': alphas
    }
    #paramsM = {'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100],
    #           'Boost__learning_rate':[(2**x)/100 for x in range(8)]+[1]}

    paramsM = {
        'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100],
        'Boost__base_estimator__alpha': alphas
    }

    # adult_income_booster = AdaBoostClassifier(algorithm='SAMME',learning_rate=1,base_estimator=adult_income_base,random_state=55)
    wine_booster = AdaBoostClassifier(algorithm='SAMME',
                                      learning_rate=1,
                                      base_estimator=wine_base,
                                      random_state=55)
    OF_booster = AdaBoostClassifier(algorithm='SAMME',
                                    learning_rate=1,
                                    base_estimator=OF_base,
                                    random_state=55)

    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('Cull3',
                       SelectFromModel(RandomForestClassifier(random_state=3),
                                       threshold='median')),
                      ('Cull4',
                       SelectFromModel(RandomForestClassifier(random_state=4),
                                       threshold='median')),
                      ('Boost', wine_booster)])

    pipeA = Pipeline([('Scale', StandardScaler()), ('Boost', wine_booster)])

    #
    # adult_income_clf = basicResults(pipeM,adult_income_trgX,adult_income_trgY,adult_income_tstX,adult_income_tstY,paramsM,'Boost','adult_income')
    wine_clf = basicResults(pipeA, wine_trgX, wine_trgY, wine_tstX, wine_tstY,
                            paramsA, 'Boost', 'wine')

    #
    #

    # adult_income_final_params = adult_income_clf.best_params_
    wine_final_params = wine_clf.best_params_
    OF_params = {'Boost__base_estimator__alpha': -1, 'Boost__n_estimators': 50}

    ##
    # pipeM.set_params(**adult_income_final_params)
    pipeA.set_params(**wine_final_params)
    # makeTimingCurve(adult_income_X,adult_income_Y,pipeM,'Boost','adult_income')
    makeTimingCurve(wineX, wineY, pipeA, 'Boost', 'wine')

    # pipeM.set_params(**adult_income_final_params)
    # iterationLC(pipeM,adult_income_trgX,adult_income_trgY,adult_income_tstX,adult_income_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost','adult_income')
    pipeM.set_params(**wine_final_params)
    iterationLC(pipeA, wine_trgX, wine_trgY, wine_tstX, wine_tstY,
                {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]},
                'Boost', 'wine')
    # pipeM.set_params(**OF_params)
    # iterationLC(pipeM,adult_income_trgX,adult_income_trgY,adult_income_tstX,adult_income_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost_OF','adult_income')
    pipeA.set_params(**OF_params)
    iterationLC(pipeA, wine_trgX, wine_trgY, wine_tstX, wine_tstY,
                {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]},
                'Boost_OF', 'wine')
コード例 #14
0
ファイル: ANN.py プロジェクト: NarendraGadidasu/Spambase
#pipeS.set_params(**spam_final_params)
#pipeS.set_params(**{'MLP__early_stopping':False})
#makeTimingCurve(spamX,spamY,pipeS,'ANN','spam')

pipeS_fs.set_params(**spam_fs_final_params)
pipeS_fs.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(spamX, spamY, pipeS_fs, 'ANN', 'spam_fs')

#pipeS.set_params(**spam_final_params)
#pipeS.set_params(**{'MLP__early_stopping':False})
#iterationLC(pipeS,spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN','spam')

pipeS_fs.set_params(**spam_fs_final_params)
pipeS_fs.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeS_fs, spam_trgX, spam_trgY, spam_tstX, spam_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'spam_fs')

#pipeS.set_params(**spam_OF_params)
#pipeS.set_params(**{'MLP__early_stopping':False})
#iterationLC(pipeS,spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN_OF','spam')

pipeS_fs.set_params(**spam_fs_OF_params)
pipeS_fs.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeS_fs, spam_trgX, spam_trgY, spam_tstX, spam_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN_OF', 'spam_fs')
コード例 #15
0
#madelon_clf = basicResults(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,paramsM,'Boost','madelon')
spam_clf = basicResults(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
                        paramsS, 'Boost', 'spam')

#
#
#madelon_final_params = {'n_estimators': 20, 'learning_rate': 0.02}
#adult_final_params = {'n_estimators': 10, 'learning_rate': 1}
#OF_params = {'learning_rate':1}

#madelon_final_params = madelon_clf.best_params_
spam_final_params = spam_clf.best_params_
OF_params = {'Boost__base_estimator__alpha': -1, 'Boost__n_estimators': 50}

##
#pipeM.set_params(**madelon_final_params)
pipeS.set_params(**spam_final_params)
#makeTimingCurve(madelonX,madelonY,pipeM,'Boost','madelon')
makeTimingCurve(spamX, spamY, pipeS, 'Boost', 'spam')
#
#pipeM.set_params(**madelon_final_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost','madelon')
pipeS.set_params(**spam_final_params)
iterationLC(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'spam')
#pipeM.set_params(**OF_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost_OF','madelon')
#pipeA.set_params(**OF_params)
#iterationLC(pipeA,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50]},'Boost_OF','adult')
コード例 #16
0
#raise

#
pipeM.set_params(**madelon_final_params)
pipeM.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(madelonX, madelonY, pipeM, 'ANN', 'cancer')
pipeA.set_params(**adult_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(adultX, adultY, pipeA, 'ANN', 'adult')

pipeM.set_params(**madelon_final_params)
pipeM.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'cancer')
pipeA.set_params(**adult_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'adult')

pipeM.set_params(**madelon_OF_params)
pipeM.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
コード例 #17
0
# fit classifier to data using gridsearch and create learning curve data
seg_clf = basicResults(pipeSeg,seg_trgX,seg_trgY,seg_tstX,seg_tstY,params_seg,'ANN','seg')        

# extract optimal parameters and create additional set of parameters with no regularization
seg_final_params =seg_clf.best_params_

# make timing curve using optimal parameters
pipeSeg.set_params(**seg_final_params)
pipeSeg.set_params(**{'MLP__early_stopping':False})                  
makeTimingCurve(segX,segY,pipeSeg,'ANN','seg')

# generate learning curve data based on number of iterations with optimal parameters     
pipeSeg.set_params(**seg_final_params)
pipeSeg.set_params(**{'MLP__early_stopping':False})                  
iterationLC(pipeSeg,seg_trgX,seg_trgY,seg_tstX,seg_tstY,{'MLP__max_iter':[2**x for x in range(13)]},'ANN','seg')       


## =============================================================================
## SVM  (Linear and RBF)       
##               
## =============================================================================

#Linear SVM
pipeSeg = Pipeline([('Scale',StandardScaler()),
                 ('SVM',LinearSVC())])

# define parameter grid
C_range = np.logspace(-2, 10, 13)

params_seg = {'SVM__C':C_range}
コード例 #18
0
# Feed learning algorithm optimal hyperparameters and output train/test timing curves over various train/test split ratios
#pipeM.set_params(**madelon_final_params)
#makeTimingCurve(madelonX,madelonY,pipeM,'Boost','madelon')
pipeA.set_params(**adult_final_params)
makeTimingCurve(adultX, adultY, pipeA, 'Boost', 'adult')
pipeM.set_params(**mushrooms_final_params)
makeTimingCurve(mushroomsX, mushroomsY, pipeM, 'Boost', 'mushrooms')
pipeR.set_params(**redwine_final_params)
makeTimingCurve(redwineX, redwineY, pipeR, 'Boost', 'redwine')

#
#pipeM.set_params(**madelon_final_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost','madelon')
pipeA.set_params(**adult_final_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'adult')
pipeM.set_params(**mushrooms_final_params)
iterationLC(pipeM, mushrooms_trgX, mushrooms_trgY, mushrooms_tstX,
            mushrooms_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'mushrooms')
pipeR.set_params(**redwine_final_params)
iterationLC(pipeR, redwine_trgX, redwine_trgY, redwine_tstX, redwine_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'redwine')
#pipeM.set_params(**OF_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost_OF','madelon')
pipeA.set_params(**OF_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost_OF',
コード例 #19
0
#
#spam_fs_final_params = spam_clf_fs.best_params_
#spam_fs_OF_params = spam_fs_final_params.copy()
#spam_fs_OF_params['SVM__alpha'] = 1e-16
#spam_final_params = spam_clf.best_params_
#spam_OF_params = spam_final_params.copy()
#spam_OF_params['SVM__alpha'] = 1e-16
#
#pipeS_fs.set_params(**spam_fs_final_params)
#makeTimingCurve(spamX,spamY,pipeS_fs,'SVM_RBF','spam_fs')
#pipeS.set_params(**spam_final_params)
#makeTimingCurve(spamX,spamY,pipeS,'SVM_RBF','spam')
#
#
#pipeS_fs.set_params(**spam_fs_final_params)
#iterationLC(pipeS_fs,spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'SVM__n_iter':[2**x for x in range(12)]},'SVM_RBF','spam_fs')

spam_final_params = {
    'SVM__alpha': 0.01,
    'SVM__gamma_frac': 1.8,
    'SVM__n_iter': 389
}
pipeS.set_params(**spam_final_params)
iterationLC(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
            {'SVM__n_iter': np.arange(30, 480, 30)}, 'SVM_RBF', 'spam')

#pipeS.set_params(**spam_OF_params)
#iterationLC(pipeS, spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'SVM__n_iter':np.arange(1,75,3)},'SVM_RBF_OF','spam')
#pipeS_fs.set_params(**spam_fs_OF_params)
#iterationLC(pipeS_fs,spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'SVM__n_iter':np.arange(100,2600,100)},'SVM_RBF_OF','spam_fs')
コード例 #20
0
ファイル: Boosting.py プロジェクト: bjaladanki3/ML
#OF_params = {'learning_rate':1}

#madelon_final_params = madelon_clf.best_params_
adult_final_params = adult_clf.best_params_
OF_params = {'Boost__base_estimator__alpha':-1, 'Boost__n_estimators':50}

##
#pipeM.set_params(**madelon_final_params)
pipeA.set_params(**adult_final_params)
#makeTimingCurve(madelonX,madelonY,pipeM,'Boost','madelon')
makeTimingCurve(adultX,adultY,pipeA,'Boost','adult')
#
#pipeM.set_params(**madelon_final_params)s
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost','madelon')
pipeA.set_params(**adult_final_params)
iterationLC(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50]},'Boost','adult')
#pipeM.set_params(**OF_params)
#iterationLC(pipeM,madelon_trgX,madelon_trgY,madelon_tstX,madelon_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost_OF','madelon')
pipeA.set_params(**OF_params)
iterationLC(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50]},'Boost_OF','adult')



# SPAM
spam = pd.read_hdf('spam.hdf','spam')
spamX = spam.drop('clas',1).copy().values
spamY = spam['clas'].copy().values

alphas = [-1,-1e-3,-(1e-3)*10**-0.5, -1e-2, -(1e-2)*10**-0.5,-1e-1,-(1e-1)*10**-0.5, 0, (1e-1)*10**-0.5,1e-1,(1e-2)*10**-0.5,1e-2,(1e-3)*10**-0.5,1e-3]

コード例 #21
0
def main():

    abalone = pd.read_hdf('data/processed/datasets.hdf', 'abalone')
    abaloneX = abalone.drop('Class', 1).copy().values
    abaloneY = abalone['Class'].copy().values

    madelon = pd.read_hdf('data/processed/datasets.hdf', 'madelon')
    madelonX = madelon.drop('Class', 1).copy().values
    madelonY = madelon['Class'].copy().values

    abalone_trgX, abalone_tstX, abalone_trgY, abalone_tstY = ms.train_test_split(
        abaloneX, abaloneY, test_size=0.3, random_state=0, stratify=abaloneY)
    madelon_trgX, madelon_tstX, madelon_trgY, madelon_tstY = ms.train_test_split(
        madelonX, madelonY, test_size=0.3, random_state=0, stratify=madelonY)

    N_abalone = abalone_trgX.shape[0]
    N_madelon = madelon_trgX.shape[0]

    alphas = [10**-x for x in np.arange(1, 9.01, 1 / 2)]

    #Linear SVM
    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('Cull3',
                       SelectFromModel(RandomForestClassifier(random_state=3),
                                       threshold='median')),
                      ('Cull4',
                       SelectFromModel(RandomForestClassifier(random_state=4),
                                       threshold='median')),
                      ('SVM',
                       SGDClassifier(loss='hinge',
                                     l1_ratio=0,
                                     penalty='l2',
                                     class_weight='balanced',
                                     random_state=55))])
    pipeA = Pipeline([('Scale', StandardScaler()),
                      ('SVM',
                       SGDClassifier(loss='hinge',
                                     l1_ratio=0,
                                     penalty='l2',
                                     class_weight='balanced',
                                     random_state=55))])

    params_abalone = {
        'SVM__alpha': alphas,
        'SVM__n_iter': [int((1e6 / N_abalone) / .8) + 1]
    }
    params_madelon = {
        'SVM__alpha': alphas,
        'SVM__n_iter': [int((1e6 / N_madelon) / .8) + 1]
    }

    madelon_clf = basicResults(pipeM, madelon_trgX, madelon_trgY, madelon_tstX,
                               madelon_tstY, params_madelon, 'SVM_Lin',
                               'madelon')
    abalone_clf = basicResults(pipeA, abalone_trgX, abalone_trgY, abalone_tstX,
                               abalone_tstY, params_abalone, 'SVM_Lin',
                               'abalone')

    #madelon_final_params = {'SVM__alpha': 0.031622776601683791, 'SVM__n_iter': 687.25}
    madelon_final_params = madelon_clf.best_params_
    madelon_OF_params = {'SVM__n_iter': 1303, 'SVM__alpha': 1e-16}
    #abalone_final_params ={'SVM__alpha': 0.0001, 'SVM__n_iter': 428}
    abalone_final_params = abalone_clf.best_params_
    abalone_OF_params = {'SVM__n_iter': 55, 'SVM__alpha': 1e-16}

    pipeM.set_params(**madelon_final_params)
    makeTimingCurve(madelonX, madelonY, pipeM, 'SVM_Lin', 'madelon')
    pipeA.set_params(**abalone_final_params)
    makeTimingCurve(abaloneX, abaloneY, pipeA, 'SVM_Lin', 'abalone')

    pipeM.set_params(**madelon_final_params)
    iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {'SVM__n_iter': [2**x for x in range(12)]}, 'SVM_Lin',
                'madelon')
    pipeA.set_params(**abalone_final_params)
    iterationLC(pipeA, abalone_trgX, abalone_trgY, abalone_tstX, abalone_tstY,
                {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_Lin', 'abalone')

    pipeA.set_params(**abalone_OF_params)
    iterationLC(pipeA, abalone_trgX, abalone_trgY, abalone_tstX, abalone_tstY,
                {'SVM__n_iter': np.arange(1, 200, 5)}, 'SVM_LinOF', 'abalone')
    pipeM.set_params(**madelon_OF_params)
    iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {'SVM__n_iter': np.arange(100, 2600, 100)}, 'SVM_LinOF',
                'madelon')

    #RBF SVM
    gamma_fracsA = np.arange(0.2, 2.1, 0.2)
    gamma_fracsM = np.arange(0.05, 1.01, 0.1)

    #
    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('Cull3',
                       SelectFromModel(RandomForestClassifier(random_state=3),
                                       threshold='median')),
                      ('Cull4',
                       SelectFromModel(RandomForestClassifier(random_state=4),
                                       threshold='median')),
                      ('SVM', primalSVM_RBF())])

    pipeA = Pipeline([('Scale', StandardScaler()), ('SVM', primalSVM_RBF())])

    params_abalone = {
        'SVM__alpha': alphas,
        'SVM__n_iter': [int((1e6 / N_abalone) / .8) + 1],
        'SVM__gamma_frac': gamma_fracsA
    }
    params_madelon = {
        'SVM__alpha': alphas,
        'SVM__n_iter': [int((1e6 / N_madelon) / .8) + 1],
        'SVM__gamma_frac': gamma_fracsM
    }
    #
    madelon_clf = basicResults(pipeM, madelon_trgX, madelon_trgY, madelon_tstX,
                               madelon_tstY, params_madelon, 'SVM_RBF',
                               'madelon')
    abalone_clf = basicResults(pipeA, abalone_trgX, abalone_trgY, abalone_tstX,
                               abalone_tstY, params_abalone, 'SVM_RBF',
                               'abalone')

    madelon_final_params = madelon_clf.best_params_
    madelon_OF_params = madelon_final_params.copy()
    madelon_OF_params['SVM__alpha'] = 1e-16
    abalone_final_params = abalone_clf.best_params_
    abalone_OF_params = abalone_final_params.copy()
    abalone_OF_params['SVM__alpha'] = 1e-16

    pipeM.set_params(**madelon_final_params)
    makeTimingCurve(madelonX, madelonY, pipeM, 'SVM_RBF', 'madelon')
    pipeA.set_params(**abalone_final_params)
    makeTimingCurve(abaloneX, abaloneY, pipeM, 'SVM_RBF', 'abalone')

    pipeM.set_params(**madelon_final_params)
    iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {'SVM__n_iter': [2**x for x in range(12)]}, 'SVM_RBF',
                'madelon')
    pipeA.set_params(**abalone_final_params)
    iterationLC(pipeA, abalone_trgX, abalone_trgY, abalone_tstX, abalone_tstY,
                {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_RBF', 'abalone')

    pipeA.set_params(**abalone_OF_params)
    iterationLC(pipeA, abalone_trgX, abalone_trgY, abalone_tstX, abalone_tstY,
                {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_RBF_OF', 'abalone')
    pipeM.set_params(**madelon_OF_params)
    iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
                {'SVM__n_iter': np.arange(100, 2600, 100)}, 'SVM_RBF_OF',
                'madelon')
コード例 #22
0
def main():

    adult = pd.read_csv('data/adult_parsed.csv')

    adult_income_X = adult.drop('income', 1).copy().values
    adult_income_Y = adult['income'].copy().values

    # wine_data = pd.read_csv('data/winequality_white.csv')
    # wine_data['category'] = wine_data['quality'] >= 7
    #
    # wineX = wine_data[wine_data.columns[0:11]].values
    # wineY = wine_data['category'].values.astype(np.int)

    adult_income_trgX, adult_income_tstX, adult_income_trgY, adult_income_tstY = ms.train_test_split(
        adult_income_X,
        adult_income_Y,
        test_size=0.3,
        random_state=0,
        stratify=adult_income_Y)
    # wine_trgX, wine_tstX, wine_trgY, wine_tstY = ms.train_test_split(wineX, wineY, test_size=0.3, random_state=0,stratify=wineY)

    N_adult_income = adult_income_trgX.shape[0]
    # N_wine = wine_trgX.shape[0]

    # alphas = [10**-x for x in np.arange(1,9.01,1/2)]

    #Linear SVM
    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('SVM',
                       SGDClassifier(loss='hinge',
                                     l1_ratio=0,
                                     penalty='l2',
                                     class_weight='balanced',
                                     random_state=55))])
    pipeA = Pipeline([('Scale', StandardScaler()),
                      ('SVM',
                       SGDClassifier(loss='hinge',
                                     l1_ratio=0,
                                     penalty='l2',
                                     class_weight='balanced',
                                     random_state=55))])

    params_adult_income = {
        'SVM__alpha': [100, 10, 1, 0.1, 0.001, 0.0001],
        'SVM__n_iter': np.arange(0.1, 1, 10)
    }
    # params_wine = {'SVM__alpha':alphas,'SVM__n_iter':[int((1e6/N_wine)/.8)+1]}

    adult_income_clf = basicResults(pipeA, adult_income_trgX,
                                    adult_income_trgY, adult_income_tstX,
                                    adult_income_tstY, params_adult_income,
                                    'SVM_Lin', 'adult_income')
    # wine_clf = basicResults(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,params_wine,'SVM_Lin','wine')

    #wine_final_params = {'SVM__alpha': 0.031622776601683791, 'SVM__n_iter': 687.25}
    # wine_final_params = wine_clf.best_params_
    # wine_OF_params = {'SVM__n_iter': 1303, 'SVM__alpha': 1e-16}
    #adult_income_final_params ={'SVM__alpha': 0.0001, 'SVM__n_iter': 428}
    adult_income_final_params = adult_income_clf.best_params_
    adult_income_OF_params = {'SVM__n_iter': 55, 'SVM__alpha': 1e-16}

    # pipeM.set_params(**wine_final_params)
    # makeTimingCurve(wineX,wineY,pipeM,'SVM_Lin','wine')
    pipeA.set_params(**adult_income_final_params)
    makeTimingCurve(adult_income_X, adult_income_Y, pipeA, 'SVM_Lin',
                    'adult_income')

    # pipeM.set_params(**wine_final_params)
    # iterationLC(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,{'SVM__n_iter':[2**x for x in range(12)]},'SVM_Lin','wine')
    pipeA.set_params(**adult_income_final_params)
    iterationLC(pipeA, adult_income_trgX, adult_income_trgY, adult_income_tstX,
                adult_income_tstY, {'SVM__n_iter': np.arange(1, 75, 3)},
                'SVM_Lin', 'adult_income')

    pipeA.set_params(**adult_income_OF_params)
    iterationLC(pipeA, adult_income_trgX, adult_income_trgY, adult_income_tstX,
                adult_income_tstY, {'SVM__n_iter': np.arange(1, 200, 5)},
                'SVM_LinOF', 'adult_income')
    # pipeM.set_params(**wine_OF_params)
    # iterationLC(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,{'SVM__n_iter':np.arange(100,2600,100)},'SVM_LinOF','wine')

    #RBF SVM
    gamma_fracsA = np.arange(0.2, 2.1, 0.2)
    gamma_fracsM = np.arange(0.05, 1.01, 0.1)

    #
    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('SVM', primalSVM_RBF())])

    pipeA = Pipeline([('Scale', StandardScaler()), ('SVM', primalSVM_RBF())])

    params_adult_income = {
        'SVM__alpha': [100, 10, 1, 0.1, 0.001, 0.0001],
        'SVM__n_iter': [int((1e6 / N_adult_income) / .8) + 1],
        'SVM__gamma_frac': gamma_fracsA
    }
    # params_wine = {'SVM__alpha':alphas,'SVM__n_iter':[int((1e6/N_wine)/.8)+1],'SVM__gamma_frac':gamma_fracsM}
    #
    # wine_clf = basicResults(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,params_wine,'SVM_RBF','wine')
    adult_income_clf = basicResults(pipeA, adult_income_trgX,
                                    adult_income_trgY, adult_income_tstX,
                                    adult_income_tstY, params_adult_income,
                                    'SVM_RBF', 'adult_income')

    # wine_final_params = wine_clf.best_params_
    # wine_OF_params = wine_final_params.copy()
    # wine_OF_params['SVM__alpha'] = 1e-16
    adult_income_final_params = adult_income_clf.best_params_
    adult_income_OF_params = adult_income_final_params.copy()
    adult_income_OF_params['SVM__alpha'] = 1e-16

    # pipeM.set_params(**wine_final_params)
    # makeTimingCurve(wineX,wineY,pipeM,'SVM_RBF','wine')
    pipeA.set_params(**adult_income_final_params)
    makeTimingCurve(adult_income_X, adult_income_Y, pipeM, 'SVM_RBF',
                    'adult_income')

    # pipeM.set_params(**wine_final_params)
    # iterationLC(pipeM,wine_trgX,wine_trgY,wine_tstX,wine_tstY,{'SVM__n_iter':[2**x for x in range(12)]},'SVM_RBF','wine')
    pipeA.set_params(**adult_income_final_params)
    iterationLC(pipeA, adult_income_trgX, adult_income_trgY, adult_income_tstX,
                adult_income_tstY, {'SVM__n_iter': np.arange(1, 75, 3)},
                'SVM_RBF', 'adult_income')

    pipeA.set_params(**adult_income_OF_params)
    iterationLC(pipeA, adult_income_trgX, adult_income_trgY, adult_income_tstX,
                adult_income_tstY, {'SVM__n_iter': np.arange(1, 75, 3)},
                'SVM_RBF_OF', 'adult_income')
def main():

    cars = pd.read_hdf('data/processed/datasets.hdf', 'cars')
    carsX = cars.drop('Class', 1).copy().values
    carsY = cars['Class'].copy().values

    madelon = pd.read_hdf('data/processed/datasets.hdf', 'madelon')
    madelonX = madelon.drop('Class', 1).copy().values
    madelonY = madelon['Class'].copy().values

    cars_trgX, cars_tstX, cars_trgY, cars_tstY = ms.train_test_split(
        carsX, carsY, test_size=0.3, random_state=0, stratify=carsY)
    madelon_trgX, madelon_tstX, madelon_trgY, madelon_tstY = ms.train_test_split(
        madelonX, madelonY, test_size=0.3, random_state=0, stratify=madelonY)

    pipeA = Pipeline([('Scale', StandardScaler()),
                      ('MLP',
                       MLPClassifier(max_iter=2000,
                                     early_stopping=True,
                                     random_state=55))])

    pipeM = Pipeline([('Scale', StandardScaler()),
                      ('Cull1',
                       SelectFromModel(RandomForestClassifier(random_state=1),
                                       threshold='median')),
                      ('Cull2',
                       SelectFromModel(RandomForestClassifier(random_state=2),
                                       threshold='median')),
                      ('Cull3',
                       SelectFromModel(RandomForestClassifier(random_state=3),
                                       threshold='median')),
                      ('Cull4',
                       SelectFromModel(RandomForestClassifier(random_state=4),
                                       threshold='median')),
                      ('MLP',
                       MLPClassifier(max_iter=2000,
                                     early_stopping=True,
                                     random_state=55))])

    d = carsX.shape[1]
    hiddens_cars = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
    alphas = [10**-x for x in np.arange(-1, 5.01, 1 / 2)]
    alphasM = [10**-x for x in np.arange(-1, 9.01, 1 / 2)]
    d = madelonX.shape[1]
    d = d // (2**4)
    hiddens_madelon = [(h, ) * l for l in [1, 2, 3]
                       for h in [d, d // 2, d * 2]]
    params_cars = {
        'MLP__activation': ['relu', 'logistic'],
        'MLP__alpha': alphas,
        'MLP__hidden_layer_sizes': hiddens_cars
    }
    params_madelon = {
        'MLP__activation': ['relu', 'logistic'],
        'MLP__alpha': alphas,
        'MLP__hidden_layer_sizes': hiddens_madelon
    }
    #
    madelon_clf = basicResults(pipeM, madelon_trgX, madelon_trgY, madelon_tstX,
                               madelon_tstY, params_madelon, 'ANN', 'madelon')
    cars_clf = basicResults(pipeA, cars_trgX, cars_trgY, cars_tstX, cars_tstY,
                            params_cars, 'ANN', 'cars')

    #madelon_final_params = {'MLP__hidden_layer_sizes': (500,), 'MLP__activation': 'logistic', 'MLP__alpha': 10.0}
    #cars_final_params ={'MLP__hidden_layer_sizes': (28, 28, 28), 'MLP__activation': 'logistic', 'MLP__alpha': 0.0031622776601683794}

    madelon_final_params = madelon_clf.best_params_
    cars_final_params = cars_clf.best_params_
    cars_OF_params = cars_final_params.copy()
    cars_OF_params['MLP__alpha'] = 0
    madelon_OF_params = madelon_final_params.copy()
    madelon_OF_params['MLP__alpha'] = 0

    #raise

    #
    pipeM.set_params(**madelon_final_params)
    pipeM.set_params(**{'MLP__early_stopping': False})
    makeTimingCurve(madelonX, madelonY, pipeM, 'ANN', 'madelon')
    pipeA.set_params(**cars_final_params)
    pipeA.set_params(**{'MLP__early_stopping': False})
    makeTimingCurve(carsX, carsY, pipeA, 'ANN', 'cars')

    pipeM.set_params(**madelon_final_params)
    pipeM.set_params(**{'MLP__early_stopping': False})
    iterationLC(
        pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY, {
            'MLP__max_iter': [2**x for x in range(12)] +
            [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
        }, 'ANN', 'madelon')
    pipeA.set_params(**cars_final_params)
    pipeA.set_params(**{'MLP__early_stopping': False})
    iterationLC(
        pipeA, cars_trgX, cars_trgY, cars_tstX, cars_tstY, {
            'MLP__max_iter': [2**x for x in range(12)] +
            [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
        }, 'ANN', 'cars')

    pipeM.set_params(**madelon_OF_params)
    pipeM.set_params(**{'MLP__early_stopping': False})
    iterationLC(
        pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY, {
            'MLP__max_iter': [2**x for x in range(12)] +
            [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
        }, 'ANN_OF', 'madelon')
    pipeA.set_params(**cars_OF_params)
    pipeA.set_params(**{'MLP__early_stopping': False})
    iterationLC(
        pipeA, cars_trgX, cars_trgY, cars_tstX, cars_tstY, {
            'MLP__max_iter': [2**x for x in range(12)] +
            [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
        }, 'ANN_OF', 'cars')
コード例 #24
0
ファイル: SVM.py プロジェクト: NarendraGadidasu/Spambase
plt.savefig('./output/SVM_Lin_ROC_Curve.png')

plt.clf()

cm = pd.DataFrame(confusion_matrix(spam_tstY, spam_clf.predict(spam_tstX)))

cm.to_csv('./output/SVM_Lin_Confusion_matrix.csv')

spam_final_params = spam_clf.best_params_
spam_OF_params = {'SVM__n_iter': 55, 'SVM__alpha': 1e-16}

pipeS.set_params(**spam_final_params)
makeTimingCurve(spamX, spamY, pipeS, 'SVM_Lin', 'spam')

pipeS.set_params(**spam_final_params)
iterationLC(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
            {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_Lin', 'spam')

pipeS.set_params(**spam_OF_params)
iterationLC(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
            {'SVM__n_iter': np.arange(1, 200, 5)}, 'SVM_LinOF', 'spam')

#RBF SVM
gamma_fracsS = np.arange(0.2, 2.1, 0.2)

#
pipeS_fs = Pipeline([('Scale', StandardScaler()),
                     ('Cull1',
                      SelectFromModel(RandomForestClassifier(random_state=1),
                                      threshold='median')),
                     ('Cull2',
                      SelectFromModel(RandomForestClassifier(random_state=2),
コード例 #25
0
paramsA = {
    'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100],
    'Boost__base_estimator__alpha': alphas
}

ab_booster = AdaBoostClassifier(algorithm='SAMME',
                                learning_rate=1,
                                base_estimator=ab_base,
                                random_state=55)
OF_booster = AdaBoostClassifier(algorithm='SAMME',
                                learning_rate=1,
                                base_estimator=OF_base,
                                random_state=55)

pipeA = Pipeline([('Scale', StandardScaler()), ('Boost', ab_booster)])

ab_clf = basicResults(pipeA, ab_trgX, ab_trgY, ab_tstX, ab_tstY, paramsA,
                      'Boost', 'ab')

ab_final_params = ab_clf.best_params_
OF_params = {'Boost__base_estimator__alpha': -1, 'Boost__n_estimators': 50}

pipeA.set_params(**ab_final_params)

makeTimingCurve(abX, abY, pipeA, 'Boost', 'ab')
pipeA.set_params(**ab_final_params)
iterationLC(pipeA, ab_trgX, ab_trgY, ab_tstX, ab_tstY,
            {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50]}, 'Boost',
            'ab')
コード例 #26
0
ファイル: Boosting.py プロジェクト: hetpsheth/CS7641--HW1
#
cancer_clf = basicResults(pipeM,cancer_trgX,cancer_trgY,cancer_tstX,cancer_tstY,paramsM,'Boost','cancer')        
adult_clf = basicResults(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,paramsA,'Boost','adult')        

#
#madelon_final_params = {'n_estimators': 20, 'learning_rate': 0.02}
#adult_final_params = {'n_estimators': 10, 'learning_rate': 1}
#OF_params = {'learning_rate':1}

cancer_final_params = cancer_clf.best_params_
adult_final_params = adult_clf.best_params_
OF_params = {'Boost__base_estimator__alpha':-1, 'Boost__n_estimators':50}

##
pipeM.set_params(**cancer_final_params)
pipeA.set_params(**adult_final_params)
makeTimingCurve(cancerX,cancerY,pipeM,'Boost','cancer')
makeTimingCurve(adultX,adultY,pipeA,'Boost','adult')
#

pipeM.set_params(**cancer_final_params)
iterationLC(pipeM,cancer_trgX,cancer_trgY,cancer_tstX,cancer_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost','cancer')        
pipeA.set_params(**adult_final_params)
iterationLC(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50]},'Boost','adult')                
pipeM.set_params(**OF_params)
iterationLC(pipeM,cancer_trgX,cancer_trgY,cancer_tstX,cancer_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50,60,70,80,90,100]},'Boost_OF','cancer')                
pipeA.set_params(**OF_params)
iterationLC(pipeA,adult_trgX,adult_trgY,adult_tstX,adult_tstY,{'Boost__n_estimators':[1,2,5,10,20,30,40,50]},'Boost_OF','adult')                

             
コード例 #27
0
spam_OF_params['MLP__alpha'] = 0

#spam_fs_final_params = spam_clf_fs.best_params_
#spam_fs_OF_params =spam_fs_final_params.copy()
#spam_fs_OF_params['MLP__alpha'] = 0

pipeS.set_params(**spam_final_params)
pipeS.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(spamX, spamY, pipeS, 'ANN', 'spam')

#pipeS_fs.set_params(**spam_fs_final_params)
#pipeS_fs.set_params(**{'MLP__early_stopping':False})
#makeTimingCurve(spamX,spamY,pipeS_fs,'ANN','spam_fs')

pipeS.set_params(**spam_final_params)
pipeS.set_params(**{'MLP__early_stopping': False})
iterationLC(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
            {'MLP__max_iter': [2**x for x in range(12)]}, 'ANN', 'spam')

#pipeS_fs.set_params(**spam_fs_final_params)
#pipeS_fs.set_params(**{'MLP__early_stopping':False})
#iterationLC(pipeS_fs,spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN','spam_fs')

pipeS.set_params(**spam_OF_params)
pipeS.set_params(**{'MLP__early_stopping': False})
iterationLC(pipeS, spam_trgX, spam_trgY, spam_tstX, spam_tstY,
            {'MLP__max_iter': [2**x for x in range(12)]}, 'ANN_OF', 'spam')

#pipeS_fs.set_params(**spam_fs_OF_params)
#pipeS_fs.set_params(**{'MLP__early_stopping':False})
#iterationLC(pipeS_fs,spam_trgX,spam_trgY,spam_tstX,spam_tstY,{'MLP__max_iter':[2**x for x in range(12)]+[2100,2200,2300,2400,2500,2600,2700,2800,2900,3000]},'ANN_OF','spam_fs')
コード例 #28
0
ファイル: SVM.py プロジェクト: mcgarrah/RandomizeOptimization
                           madelon_tstY, params_madelon, 'SVM_RBF', 'madelon')
adult_clf = basicResults(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
                         params_adult, 'SVM_RBF', 'adult')

madelon_final_params = madelon_clf.best_params_
madelon_OF_params = madelon_final_params.copy()
madelon_OF_params['SVM__alpha'] = 1e-16
adult_final_params = adult_clf.best_params_
adult_OF_params = adult_final_params.copy()
adult_OF_params['SVM__alpha'] = 1e-16

pipeM.set_params(**madelon_final_params)
makeTimingCurve(madelonX, madelonY, pipeM, 'SVM_RBF', 'madelon')
pipeA.set_params(**adult_final_params)
makeTimingCurve(adultX, adultY, pipeM, 'SVM_RBF', 'adult')

pipeM.set_params(**madelon_final_params)
iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
            {'SVM__n_iter': [2**x for x in range(12)]}, 'SVM_RBF', 'madelon')
pipeA.set_params(**adult_final_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_RBF', 'adult')

pipeA.set_params(**adult_OF_params)
iterationLC(pipeA, adult_trgX, adult_trgY, adult_tstX, adult_tstY,
            {'SVM__n_iter': np.arange(1, 75, 3)}, 'SVM_RBF_OF', 'adult')
pipeM.set_params(**madelon_OF_params)
iterationLC(pipeM, madelon_trgX, madelon_trgY, madelon_tstX, madelon_tstY,
            {'SVM__n_iter': np.arange(100, 2600, 100)}, 'SVM_RBF_OF',
            'madelon')
コード例 #29
0
ファイル: ANN.py プロジェクト: NarendraGadidasu/Abalone
    'MLP__alpha': alphas,
    'MLP__hidden_layer_sizes': hiddens_ab
}

ab_clf = basicResults(pipeA, ab_trgX, ab_trgY, ab_tstX, ab_tstY, params_ab,
                      'ANN', 'ab')

ab_final_params = ab_clf.best_params_
ab_OF_params = ab_final_params.copy()
ab_OF_params['MLP__alpha'] = 0

pipeA.set_params(**ab_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
makeTimingCurve(abX, abY, pipeA, 'ANN', 'ab')

pipeA.set_params(**ab_final_params)
pipeA.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeA, ab_trgX, ab_trgY, ab_tstX, ab_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN', 'ab')

pipeA.set_params(**ab_OF_params)
pipeA.set_params(**{'MLP__early_stopping': False})
iterationLC(
    pipeA, ab_trgX, ab_trgY, ab_tstX, ab_tstY, {
        'MLP__max_iter': [2**x for x in range(12)] +
        [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
    }, 'ANN_OF', 'ab')