Exemple #1
0
# Spliting and specify testing data = 0.4
X_train, X_test, y_train, y_test = train_test_split(datatrain_array[:, :9],
                                                    datatrain_array[:, 9],
                                                    test_size=0.4)
"""
input layer has 9 neuron because 9 feature in phishing dataset
hidden layer has 10 neuron
output layer has 3 neuron because 3 class in phishing dataset
using inbuilt solver sgd = stochastic gradient descent
learning rate = 0.01
iteration = 1500
"""

from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(10),
                    activation='logistic',
                    solver='sgd',
                    learning_rate_init=0.01,
                    max_iter=1500,
                    random_state=7,
                    tol=0.001)

# Train the model
mlp.fit(X_train, y_train)

# Test the model
print("testing model score")
print(mlp.score(X_test, y_test))
Exemple #2
0
from mnist import MNIST
from sklearn.neural_network import MLPClassifier

mndata = MNIST('samples')

images, labels = mndata.load_training()

classifier = MLPClassifier(solver='sgd',
                           alpha=0.0001,
                           verbose=True,
                           hidden_layer_sizes=(70, ),
                           random_state=1)

print('training')
classifier.fit(images, labels)

import pickle

pickle.dump(classifier, open('network.pickle', 'wb'))
Exemple #3
0
def ann(train_x, train_y, test_x, test_y, msno_df):
    print ("ANN")
    clf = MLPClassifier(hidden_layer_sizes=(100,150,100,50), activation="relu", solver="lbfgs", alpha=1.0, max_iter=500)
    checkResult(clf, "ANN", train_x, train_y, test_x, test_y, msno_df)
Exemple #4
0
def first_generation(X, y, seed=None):
    mlp_parameters = list(
        itertools.product([1, 2, 4, 8, 16], [0, 0.2, 0.5, 0.9], [0.3, 0.6]))
    mlp_clf = [
        MLPClassifier(hidden_layer_sizes=(h, ),
                      momentum=m,
                      learning_rate_init=a) for (h, m, a) in mlp_parameters
    ]
    mlp_name = ['mlp_{0}_{1}_{2}'.format(*param) for param in mlp_parameters]

    neigbhors_number = [int(i) for i in np.linspace(1, X.shape[0], 20)]
    weighting_methods = ['uniform', 'distance', lambda x: abs(1 - x)]
    knn_clf = [
        KNeighborsClassifier(n_neighbors=nn, weights=w)
        for (nn, w) in itertools.product(neigbhors_number, weighting_methods)
    ]
    knn_name = [
        'knn_{0}_{1}'.format(*param) for param in itertools.product(
            neigbhors_number, ['uniform', 'distance', 'similarity'])
    ]

    C = [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100]
    degree = [2, 3]
    gamma = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2]
    svm_clf_poly = [
        SVC(C=c, kernel='poly', degree=d)
        for (c, d) in itertools.product(C, degree)
    ]
    svm_clf_poly_name = [
        'svm_poly_{0}_{1}'.format(*param)
        for param in itertools.product(C, degree)
    ]
    svm_clf_rbf = [
        SVC(C=c, kernel='rbf', gamma=g)
        for (c, g) in itertools.product(C, gamma)
    ]
    svm_clf_rbf_name = [
        'svm_rbf_{0}_{1}'.format(*param)
        for param in itertools.product(C, gamma)
    ]

    dt_max_depth_params = list(
        itertools.product(['gini', 'entropy'], [1, 2, 3, 4, None]))
    dt_max_depth_clf = [DecisionTreeClassifier(criterion=c, max_depth=d) \
                        for (c, d) in dt_max_depth_params]
    dt_max_depth_name = [
        'dt_max_depth_{0}_{1}'.format(*param) for param in dt_max_depth_params
    ]

    dt_max_features_params = list(
        itertools.product(['gini', 'entropy'], [None, 'sqrt', 'log2', 0.5]))
    dt_max_features_clf = [DecisionTreeClassifier(criterion=c, max_features=f) \
                           for (c, f) in dt_max_features_params]
    dt_max_features_name = [
        'dt_max_features_{0}_{1}'.format(*param)
        for param in dt_max_features_params
    ]

    dt_min_leaf_params = [2, 3]
    dt_min_leaf_clf = [
        DecisionTreeClassifier(min_samples_leaf=l) for l in dt_min_leaf_params
    ]
    dt_min_leaf_name = [
        'dt_min_leaf_{0}'.format(param) for param in dt_min_leaf_params
    ]

    pool = mlp_clf + knn_clf + svm_clf_poly + svm_clf_rbf + dt_max_depth_clf + dt_max_features_clf + \
           dt_min_leaf_clf
    pool_name = mlp_name + knn_name + svm_clf_poly_name + svm_clf_rbf_name + dt_max_depth_name + \
                dt_max_features_name + dt_min_leaf_name

    ensemble = VotingClassifier(estimators=list(zip(pool_name, pool)))
    ensemble.fit(X, y)
    estimators = ensemble.estimators_

    return estimators, pool_name
Exemple #5
0
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn import datasets
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

mnist = datasets.load_digits()
X = mnist['data']
y = mnist['target']
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=3116)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
#Initialize Neural Network, set up parameters for your grid search and implement a Random Search procedure
ann = MLPClassifier()
grid_parameters = {
    'hidden_layer_sizes': list(range(100, 450, 50)),
    'activation': ['relu', 'identity', 'logistic', 'tanh'],
    'learning_rate': ['constant', 'adaptive', 'invscaling']
}
ann_grid_search = RandomizedSearchCV(ann, grid_parameters, cv=5, n_iter=10)
ann_grid_search.fit(X_train, y_train)

#Accuracy score
y_pred = ann_grid_search.predict(X_test)
print('Accuracy Score:', accuracy_score(y_test, y_pred))

#Best hyperparameters
print(ann_grid_search.best_estimator_)
Exemple #6
0
x_data = x_data.loc[:, x_data.columns != "Sequence"]

y_data = data.loc[:, "Type"]
random_state = 100

x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                    y_data,
                                                    test_size=0.7,
                                                    random_state=100,
                                                    stratify=y_data)
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

mlp = MLPClassifier()
mlp.fit(x_train, y_train)

y_pred_train = mlp.predict(x_train)
y_pred_test = mlp.predict(x_test)

print("classifier", mlp)
print("Accuracy on Train Set")
print(mlp.score(x_train, y_train))
print("MLP Classifier")
print("Accuracy on Test Set")
print(mlp.score(x_test, y_test))
print("Report")
print(classification_report(y_test, mlp.predict(x_test)))

param_grid = {
Exemple #7
0
def second_generation(X, y, seed=None):
    features = []
    ### 25 x 2 bagged trees
    bag_gini = BaggingClassifier(
        base_estimator=DecisionTreeClassifier(criterion='gini'),
        n_estimators=25,
        random_state=seed)
    bag_gini.fit(X, y)
    bag_gini_names = ['bag_gini_' + str(i) for i in range(25)]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(bag_gini_names))])

    bag_entropy = BaggingClassifier(
        base_estimator=DecisionTreeClassifier(criterion='entropy'),
        n_estimators=25,
        random_state=3 * seed**2)
    bag_entropy.fit(X, y)
    bag_entropy_names = ['bag_entropy_' + str(i) for i in range(25)]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(bag_entropy_names))])

    ### 25 x 2 random subspaces
    rs_gini = BaggingClassifier(
        base_estimator=DecisionTreeClassifier(criterion='gini'),
        n_estimators=25,
        max_features=int(np.sqrt(X.shape[1])),
        bootstrap=False,
        random_state=seed)
    rs_gini.fit(X, y)
    rs_gini_names = ['rs_gini_' + str(i) for i in range(25)]
    features.extend(rs_gini.estimators_features_)

    rs_entropy = BaggingClassifier(
        base_estimator=DecisionTreeClassifier(criterion='entropy'),
        n_estimators=25,
        max_features=int(np.sqrt(X.shape[1])),
        bootstrap=False,
        random_state=3 * seed**2)
    rs_entropy.fit(X, y)
    rs_entropy_names = ['rs_entropy_' + str(i) for i in range(25)]
    features.extend(rs_entropy.estimators_features_)

    ### 14 Ada
    nb_stumps = [2, 4, 8, 16, 32, 64, 128]
    ada_st_gini = [
        AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
            criterion='gini', max_depth=1),
                           n_estimators=st,
                           random_state=seed) for st in nb_stumps
    ]
    ada_st_gini_names = ['ada_st_gini_' + str(i) for i in nb_stumps]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(ada_st_gini_names))])
    for clf in ada_st_gini:
        clf.fit(X, y)

    ada_st_entropy = [
        AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
            criterion='entropy', max_depth=1),
                           n_estimators=st,
                           random_state=3 * seed**2) for st in nb_stumps
    ]
    ada_st_entropy_names = ['ada_st_entropy_' + str(i) for i in nb_stumps]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(ada_st_entropy_names))])
    for clf in ada_st_entropy:
        clf.fit(X, y)

    ### 8 Ada DT
    nb_dt = [2, 4, 8, 16]
    ada_dt_gini = [
        AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
            criterion='gini', max_depth=3),
                           n_estimators=dt,
                           random_state=seed) for dt in nb_dt
    ]
    ada_dt_gini_names = ['ada_dt_gini_' + str(i) for i in nb_dt]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(ada_dt_gini_names))])
    for clf in ada_dt_gini:
        clf.fit(X, y)

    ada_dt_entropy = [
        AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
            criterion='entropy', max_depth=3),
                           n_estimators=st,
                           random_state=3 * seed**2) for dt in nb_dt
    ]
    ada_dt_entropy_names = ['ada_dt_entropy_' + str(i) for i in nb_dt]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(ada_dt_entropy_names))])
    for clf in ada_dt_entropy:
        clf.fit(X, y)

    ### 24 ANN
    mlp_parameters = list(itertools.product([1, 2, 4, 8, 32, 128],\
                                            [0, 0.2, 0.5, 0.9]))
    mlp_clf = [
        MLPClassifier(hidden_layer_sizes=(h, ), momentum=m)
        for (h, m) in mlp_parameters
    ]
    for clf in mlp_clf:
        clf.fit(X, y)
    mlp_name = ['mlp_{0}_{1}'.format(*param) for param in mlp_parameters]
    features.extend([np.arange(X.shape[1]) for _ in range(len(mlp_name))])

    ### 54 SVM
    C = np.logspace(-3, 2, num=6)
    gamma = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2]

    svm_linear = [SVC(C=c, kernel='poly', degree=1) for c in C]
    for clf in svm_linear:
        clf.fit(X, y)
    svm_linear_names = ['svm_linear_' + str(c) for c in C]
    features.extend(
        [np.arange(X.shape[1]) for _ in range(len(svm_linear_names))])

    svm_rbf = [SVC(C=c, gamma=g) for c, g in itertools.product(C, gamma)]
    for clf in svm_rbf:
        clf.fit(X, y)
    svm_rbf_names = [
        'svm_rbf_{0}_{1}'.format(*param)
        for param in itertools.product(C, gamma)
    ]
    features.extend([np.arange(X.shape[1]) for _ in range(len(svm_rbf_names))])

    pool = bag_gini.estimators_ + bag_entropy.estimators_ + rs_gini.estimators_ + rs_entropy.estimators_ + \
           ada_st_gini + ada_st_entropy + ada_dt_gini + ada_dt_entropy + mlp_clf + svm_linear + svm_rbf

    pool_name = bag_gini_names + bag_entropy_names + rs_gini_names + rs_entropy_names + ada_st_gini_names + \
                ada_st_entropy_names + ada_dt_gini_names + ada_dt_entropy_names + mlp_name + svm_linear_names + \
                svm_rbf_names

    return pool, pool_name, features
Exemple #8
0
X, Y = nudge_dataset(X, digits.target)
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling
X = 2 * X - 1  # [-1,1] scaling

# plot_sample(X[0,:])
# plot_sample(-X[0,:])
# exit()

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=0)

model1 = MLPClassifier(solver='lbfgs',
                       alpha=1e-5,
                       activation='tanh',
                       hidden_layer_sizes=(20, 10),
                       random_state=1)

model2 = ConstrainedMLPClassifier(solver='lbfgs',
                                  alpha=1e-5,
                                  activation='tanh',
                                  hidden_layer_sizes=(20, 10),
                                  random_state=2,
                                  fit_intercepts=False)

model1.fit(X_train, Y_train)
model2.fit(X_train, Y_train)

result_1 = "NN model with biases test results:\n{}\n".format(
    metrics.classification_report(Y_test, model1.predict(X_test)))
## With K-fold our training data is divided into 5 parts, the prediction model is generated for the 4 parts, and tested on the 5th part
kfold = KFold(n_splits=5, random_state=82089)
cv_results = cross_val_score(logreg, x_train, y_train, cv=kfold)
print (cv_results.mean()*100, "%")

## Define regularization parameter
## The lower the value of C, the higher we penalize the coeeficients of our logstic regression
param_grid = {"C":[0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]}
grid = GridSearchCV(estimator=logreg, param_grid=param_grid, cv=kfold)
grid.fit(x_train,y_train)
print (grid.best_estimator_.C)
print (grid.best_score_*100, "%")

## Generate Neural Network model
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', random_state=1, activation='logistic', hidden_layer_sizes=(100,))
kfold = KFold(n_splits=5,random_state=82089)
cv_results = cross_val_score(clf, x_train, y_train, cv=kfold)

print (cv_results.mean()*100, "%")
## Find the regularization parameter
param_grid = {"alpha":10.0 ** -np.arange(-4, 7)}
grid = GridSearchCV(estimator=clf, param_grid=param_grid, cv=kfold)
grid.fit(x_train,y_train)
print (grid.best_estimator_.alpha)
print (grid.best_score_*100, "%")

## Now that we know the optimal alpha ve C values. Let's check the results of accuracy.

## For Logistic regression
Exemple #10
0
np.random.seed(seed)
np.random.shuffle(X_train) 
np.random.seed(seed)
np.random.shuffle(Y_train)


models = []
models.append(('GLM', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('BY', GaussianNB()))
models.append(('SVM', SVC(probability=True)))
models.append(('BAG', BaggingClassifier()))
models.append(('NNet', MLPClassifier()))
models.append(('RF', RandomForestClassifier()))
models.append(('BST', AdaBoostClassifier()))

seed = 7

#crossvalidation on trainset and select the best model on validation set, test on test set
import xlwt
sel = []
sel.append(('CHSQ', SelectKBest(chi2, k=num_fea)))
sel.append(('ANOVA', SelectKBest(f_classif, k=num_fea)))
sel.append(('TSCR', SelectKBest(t_score.t_score, k=num_fea)))
sel.append(('FSCR', SelectKBest(fisher_score.fisher_score, k=num_fea)))
sel.append(('RELF', SelectKBest(reliefF.reliefF, k=num_fea)))

Exemple #11
0
# Ahora comprobamos la eficacia, y nada más!!!
predicciones = logreg.predict(data_x_test)
print("Eficacia (reg. lineal) del {0}".format(np.mean(predicciones==data_y_test) * 100))



######################################################
# Red neuronal
######################################################


from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

modelo = MLPClassifier(random_state=1, verbose=False)

param_grid = [
  {
  	'hidden_layer_sizes': [(2,),(4,),(8,),(16,)], 
  	'solver':['lbfgs'],
  	# 'alpha': 10.0 ** -np.arange(1, 7),
  	# 'max_iter': [500,1000,1500]
  },
  {
  	'hidden_layer_sizes': [(2,),(4,),(8,),(16,)], 
  	'solver':['adam'],
  	# 'alpha': 10.0 ** -np.arange(1, 7),
  	# 'max_iter': [500,1000,1500]
  }
]
Exemple #12
0
import support
from sklearn.model_selection import KFold, cross_validate
from sklearn.svm import SVC, SVR
from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

if __name__ == '__main__':  # 直接実行された場合のみ実行し、それ以外の場合は実行しない
    # ベンチマークとなるアルゴリズムと、アルゴリズムを実装したモデルの一覧
    # それぞれのアルゴリズムの引数を指定している。
    models = [
        ('SVM', SVC(random_state=1), SVR()),
        ('GaussianProcess', GaussianProcessClassifier(random_state=1),
         GaussianProcessRegressor(normalize_y=True, alpha=1, random_state=1)),
        ('KNeighbors', KNeighborsClassifier(), KNeighborsRegressor()),
        ('MLP', MLPClassifier(random_state=1),
         MLPRegressor(hidden_layer_sizes=(5), solver='lbfgs', random_state=1)),
    ]

    # 検証用データセットのファイルとファイルの区切り文字、ヘッダーとなる行の位置、インデックスとなる列の位置のリスト
    classifier_files = ['iris.data', 'sonar.all-data', 'glass.data']
    classifier_params = [(',', None, None), (',', None, None), (',', None, 0)]
    regressor_files = [
        'airfoil_self_noise.dat', 'winequality-red.csv',
        'winequality-white.csv'
    ]
    regressor_params = [(r'\t', None, None), (';', 0, None), (';', 0, None)]

    # 評価スコアを検証用データセットのファイル、アルゴリズムごとに保存する表
    result = pd.DataFrame(
        columns=['target', 'function'] + [m[0] for m in models],

# how you can make predictions
predictions = model.predict(X_test)

# what did we get?
predictions

# manually check the accuracy of your predictions
N = len(y_test)
np.sum(predictions == y_test) / N # can also just call np.mean()



# we can even use deep learning to solve the same problem!
from sklearn.neural_network import MLPClassifier

# you'll learn why scaling is needed in a later course
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train2 = scaler.fit_transform(X_train)
X_test2 = scaler.transform(X_test)

model = MLPClassifier(max_iter=500)
model.fit(X_train2, y_train)


# evaluate the model's performance
model.score(X_train2, y_train)
model.score(X_test2, y_test)
Exemple #14
0
train_index_list = pickle.load(mfile)
test_index_list = pickle.load(mfile)
mfile.close()

y = yb

cls_names = [
    'SVC', 'LogReg', 'GradBoost', 'NeuralNet', 'RandForest', 'NaiveBayes',
    'K-NN'
]

clss = [
    SVC(gamma='scale'),
    LogisticRegression(solver='lbfgs', max_iter=500),
    GradientBoostingClassifier(),
    MLPClassifier(),
    RandomForestClassifier(),
    GaussianNB(),
    KNeighborsClassifier()
]

accuracy_score_log = {}
for cn, cls in zip(cls_names, clss):
    print("\n\nEvaluating %s now......" % cn)
    accuracy_score_log[cn] = []
    fold_i = 0
    for train_index, test_index in zip(train_index_list, test_index_list):
        fold_i += 1
        print("\t\tfold %d: " % fold_i, end='')

        X_train, X_test = X[train_index], X[test_index]
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", type=str, default="knn",
        help="type of python machine learning model to use")
args = vars(ap.parse_args())

# define the dictionary of models our script can use, where the key
# to the dictionary is the name of the model (supplied via command
# line argument) and the value is the model itself
models = {
        "knn": KNeighborsClassifier(n_neighbors=1),
        "naive_bayes": GaussianNB(),
        "logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
        "svm": SVC(kernel="rbf", gamma="auto"),
        "decision_tree": DecisionTreeClassifier(),
        "random_forest": RandomForestClassifier(n_estimators=100),
        "mlp": MLPClassifier()
}

# load the Iris dataset and perform a training and testing split,
# using 75% of the data for training and 25% for evaluation
print("[INFO] loading data...")
dataset = load_iris()
(trainX, testX, trainY, testY) = train_test_split(dataset.data,
        dataset.target, random_state=3, test_size=0.25)

# train the model
print("[INFO] using '{}' model".format(args["model"]))
model = models[args["model"]]
model.fit(trainX, trainY)

# make predictions on our data and show a classification report
accuracy=0
hl=[1,3]
act=['logistic', 'tanh', 'relu']
sol=['lbfgs','sgd','adam']
al=[0.0001,0.0005]
bs=[64,128]
lr=['constant','invscaling','adaptive']
best_params = [0,0,0,0,0,0]
params = [0,0,0,0,0,0]
for h in hl:
    for a in act:
        for s in sol:
            for a1 in al:
                for b in bs:
                    for l in lr:
                        classifier=MLPClassifier(hidden_layer_sizes=h,activation=a,solver=s,alpha=a1,batch_size=b,learning_rate=l)
                        classifier.fit(train_inp,train_label)
                        ypred=classifier.predict(test_inp)
                        #acc=classifier.score(ypred,test_label)
                        x=0
                        score=0
                        for i in ypred:
                            if(i==test_label[x]):
                                score=score+1
                        accuracy=score/10
                        params=[h,a,s,a1,b,l]
                        print("=========")
                        print('Accuracy:',accuracy)
                        print('Params:',params)
                        print("=========")
                        if(best<=accuracy):
Exemple #17
0
from pylab import rcParams
from sklearn import metrics 
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix,accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
url = 'C:/Users/Camila/Documents/Tesis/csv/relative/data.csv'
eeg_dataset = pd.read_csv(url,error_bad_lines=False)
eeg_dataset.head()


X = eeg_dataset[['alpha','betha','delta','gamma','theta']].values
y = eeg_dataset[['class']].values.ravel()

clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(25, 2), random_state=1, max_iter=2500)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7,test_size = .3, random_state=25)

from sklearn.preprocessing import StandardScaler  
scaler = StandardScaler() 

# Don't cheat - fit only on training data
scaler.fit(X_train)  
X_train = scaler.transform(X_train) 

# apply same transformation to test data
X_test = scaler.transform(X_test)  # doctest: +SKIP

clf.fit(X_train, y_train)
h = .02  # step size in the mesh

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
    "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes",
    "QDA"
]

classifiers = [
    KNeighborsClassifier(2),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=50),
    RandomForestClassifier(max_depth=50, n_estimators=100, max_features=10),
    MLPClassifier(alpha=1, max_iter=10000),
    AdaBoostClassifier(),
    GaussianNB()
]

import scipy.io

dataset = scipy.io.loadmat('train_data.mat')
train_data = dataset['train_data']
X_train = train_data[:, 0:-1]
y_train = train_data[:, -1]

dataset = scipy.io.loadmat('test_data.mat')
test_data = dataset['test_data']
X_test = test_data[:, 0:-1]
y_test = test_data[:, -1]
Exemple #19
0
yeast.loc[:, 'SequenceName'] = enc.fit_transform(yeast.loc[:, 'SequenceName'])
yeast.set_index(['SequenceName'], inplace=True)
yeast.loc[:, 'ClassDist'] = enc.fit_transform(yeast.loc[:, 'ClassDist'])

# -- DV/IV Splitting --
X_adult = adult.drop('income_bin', axis=1)
Y_adult = adult.loc[:, 'income_bin']

X_yeast = yeast.drop('ClassDist', axis=1)
Y_yeast = yeast.loc[:, 'ClassDist']

# -- Classifier setup --
default_adult_DTree = DecisionTreeClassifier(random_state=13)
default_adult_knn = KNeighborsClassifier()
default_adult_RFC = RandomForestClassifier(random_state=13)
default_adult_MLP = MLPClassifier(max_iter=5000, random_state=13)
default_adult_SVC = SVC(random_state=13)
default_yeast_DTree = DecisionTreeClassifier(random_state=13)
default_yeast_knn = KNeighborsClassifier()
default_yeast_RFC = RandomForestClassifier(random_state=13)
default_yeast_MLP = MLPClassifier(max_iter=5000, random_state=13)
default_yeast_SVC = SVC(random_state=13)

adult_DTree = DecisionTreeClassifier(criterion="entropy",
                                     max_depth=10,
                                     min_samples_leaf=50,
                                     min_samples_split=500,
                                     random_state=13)
adult_knn = KNeighborsClassifier(n_neighbors=30, n_jobs=4)
adult_RFC = RandomForestClassifier(max_depth=20,
                                   min_samples_split=50,
Exemple #20
0
 def set_mlp(self, hidden_layer_sizes):
     self.mlp_hidden_layer_sizes_list.append(hidden_layer_sizes)
     self.mlp_clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, random_state=1)
Exemple #21
0
def third_generation(X, y, size=200, seed=None):
    mlp_parameters = list(itertools.product([1, 2, 4, 8, 32, 128],\
                                            [0, 0.2, 0.5, 0.9],
                                            [0.1, 0.3, 0.6]))
    mlp_clf = [
        MLPClassifier(hidden_layer_sizes=(h, ),
                      momentum=m,
                      learning_rate_init=a) for (h, m, a) in mlp_parameters
    ]
    mlp_name = ['mlp_{0}_{1}_{2}'.format(*param) for param in mlp_parameters]

    neigbhors_number = [int(i) for i in np.linspace(1, X.shape[0], 40)]
    weighting_methods = ['uniform', 'distance']
    knn_clf = [
        KNeighborsClassifier(n_neighbors=nn, weights=w)
        for (nn, w) in itertools.product(neigbhors_number, weighting_methods)
    ]
    knn_name = [
        'knn_{0}_{1}'.format(*param) for param in itertools.product(
            neigbhors_number, ['uniform', 'distance'])
    ]
    C = np.logspace(-3, 7, num=11)
    degree = [2, 3, 4]
    gamma = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2]
    svm_clf_poly = [
        SVC(C=c, kernel='poly', degree=d)
        for (c, d) in itertools.product(C, degree)
    ]
    svm_clf_poly_name = [
        'svm_poly_{0}_{1}'.format(*param)
        for param in itertools.product(C, degree)
    ]
    svm_clf_rbf = [
        SVC(C=c, kernel='rbf', gamma=g)
        for (c, g) in itertools.product(C, gamma)
    ]
    svm_clf_rbf_name = [
        'svm_rbf_{0}_{1}'.format(*param)
        for param in itertools.product(C, gamma)
    ]

    dt_params = list(itertools.product(['gini', 'entropy'], \
                                       [1, 2, 3, 4, 5, None], \
                                       [None, 'sqrt', 'log2'], \
                                       ['best', 'random']))
    dt_clf = [
        DecisionTreeClassifier(criterion=c,
                               max_depth=d,
                               max_features=f,
                               splitter=s) for (c, d, f, s) in dt_params
    ]
    dt_name = ['dt_{0}_{1}_{2}_{3}'.format(*param) for param in dt_params]

    et_clf = [
        ExtraTreeClassifier(criterion=c,
                            max_depth=d,
                            max_features=f,
                            splitter=s) for (c, d, f, s) in dt_params
    ]
    et_name = ['et_{0}_{1}_{2}_{3}'.format(*param) for param in dt_params]

    ada_params = list(itertools.product([2**i for i in range(1, 14)], \
                                        [1, 2, 3]))
    ada_dt_clf = [
        AdaBoostClassifier(n_estimators=n,
                           base_estimator=DecisionTreeClassifier(max_depth=m))
        for (n, m) in ada_params
    ]
    ada_et_clf = [
        AdaBoostClassifier(n_estimators=n,
                           base_estimator=ExtraTreeClassifier(max_depth=m))
        for (n, m) in ada_params
    ]
    ada_dt_name = ['ada_dt_{0}_{1}'.format(*param) for param in ada_params]
    ada_et_name = ['ada_et_{0}_{1}'.format(*param) for param in ada_params]

    nb_bag_est = 50
    nb_bag_stumps = 200
    bag_dt = BaggingClassifier(n_estimators=nb_bag_est,
                               base_estimator=DecisionTreeClassifier())
    bag_et = BaggingClassifier(n_estimators=nb_bag_est,
                               base_estimator=ExtraTreeClassifier())
    bag_stumps = BaggingClassifier(
        n_estimators=nb_bag_stumps,
        base_estimator=DecisionTreeClassifier(max_depth=1))
    bag_dt.fit(X, y)
    bag_et.fit(X, y)
    bag_stumps.fit(X, y)
    dt_bag_clf = bag_dt.estimators_
    et_bag_clf = bag_et.estimators_
    stump_bag_clf = bag_stumps.estimators_
    dt_bag_name = ['dt_bag_{0}'.format(nb_est) for nb_est in range(nb_bag_est)]
    et_bag_name = ['et_bag_{0}'.format(nb_est) for nb_est in range(nb_bag_est)]
    stump_bag_name = [
        'stump_bag_{0}'.format(nb_est) for nb_est in range(nb_bag_stumps)
    ]

    bag_dt_clf = [bag_dt]
    bag_et_clf = [bag_dt]
    bag_stump_clf = [bag_stumps]
    bag_dt_name = ['bag_dt_{0}'.format(str(nb_bag_est))]
    bag_et_name = ['bag_et_{0}'.format(str(nb_bag_est))]
    bag_stump_name = ['bag_stump_{0}'.format(str(200))]

    nb_rf = 15
    rf = RandomForestClassifier(n_estimators=nb_rf)
    rf.fit(X, y)
    dt_rf_clf = rf.estimators_
    dt_rf_name = ['dt_rf_{0}'.format(nb_est) for nb_est in range(nb_rf)]

    log_parameters = list(itertools.product(['l1', 'l2'],\
                                            np.logspace(-5, 9, num=15),
                                            [True, False]))
    log_clf = [
        LogisticRegression(penalty=l, C=c, fit_intercept=f)
        for (l, c, f) in log_parameters
    ]
    log_name = ['log_{0}_{1}_{2}'.format(*param) for param in log_parameters]

    sgd_parameters = list(
        itertools.product([
            'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron',
            'squared_loss', 'huber', 'epsilon_insensitive',
            'squared_epsilon_insensitive'
        ], ['elasticnet'], [True, False], np.arange(0, 1.1, 0.1)))
    sgd_clf = [
        SGDClassifier(loss=l, penalty=p, fit_intercept=f, l1_ratio=l1)
        for (l, p, f, l1) in sgd_parameters
    ]
    sgd_name = [
        'sgd_{0}_{1}_{2}_{3}'.format(*param) for param in sgd_parameters
    ]

    pool = mlp_clf + knn_clf + svm_clf_poly + svm_clf_rbf + dt_clf + et_clf + ada_dt_clf + ada_et_clf + \
                dt_bag_clf + et_bag_clf + stump_bag_clf + bag_dt_clf + bag_et_clf + bag_stump_clf + dt_rf_clf + \
                log_clf + sgd_clf
    pool_name = mlp_name + knn_name + svm_clf_poly_name + svm_clf_rbf_name + dt_name + et_name + ada_dt_name + \
                ada_et_name + dt_bag_name + et_bag_name + stump_bag_name + bag_dt_name + bag_et_name + \
                bag_stump_name + dt_rf_name + log_name + sgd_name

    for model in pool:
        if not check_model_is_fitted(model, X[0, :].reshape((1, -1))):
            model.fit(X, y)

    np.random.seed(seed)
    order = np.random.permutation(range(len(pool)))
    estimators = [pool[i] for i in order[:size]]

    return estimators, pool_name
Exemple #22
0
from sklearn.linear_model import RidgeClassifierCV

model = RidgeClassifierCV()
classifier(X_train_rare, y_train, X_test_rare, y_test, cats, model)
classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model)

from sklearn.svm import SVC

model = SVC()
classifier(X_train_rare, y_train, X_test_rare, y_test, cats, model)
classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model)

from sklearn.neural_network import MLPClassifier

model = MLPClassifier()
classifier(X_train, y_train, X_test, y_test, cats, model)
classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model)

from sklearn.svm import LinearSVC

model = LinearSVC()
classifier(X_train, y_train, X_test, y_test, cats, model)
classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model)

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
classifier(X_train, y_train, X_test, y_test, cats, model)
classifier(X_train_freq, y_train, X_test_freq, y_test, cats, model)
data1 = scipy.io.loadmat('NN_ex4/ex4data1.mat')
data2 = scipy.io.loadmat('NN_ex4/ex4weights.mat')
X = np.array(data1["X"])          # 5000 samples with 400 parameters(20x20 gray scale) 
y = data1["y"]          # target as 0-9 digits
y[y==10] = 0            # convert 10s to 0s


# create test and train variables 


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2)

#create model

clf = MLPClassifier(solver = "lbfgs", activation="relu", hidden_layer_sizes=(20, 20))

#train model

clf.fit(X_train, y_train)

#accuracy 

predicts = clf.predict(X_test)
acc = confusion_matrix(y_test, predicts)

def accuracy(cm):
    diagonal = cm.trace()
    elements = np.sum(cm)
    return diagonal/elements
Exemple #24
0
# ])
# Y = pd.DataFrame([
#     1, 1, 1, 1, 0, 0, 0, 0
# ])
train_X = pd.DataFrame(train_X.transpose())
tr_X = train_X.iloc[:, :21600]
tr_Y = train_Y.iloc[:21600, :]
ts_X = train_X.iloc[:, 21600:]
ts_Y = train_Y.iloc[:21600, :]
print(tr_X.shape, tr_Y.shape, ts_X.shape, ts_Y.shape)
mlpc = MLPClassifier(verbose=False,
                     hidden_layer_sizes=(200, ),
                     activation='relu',
                     max_iter=20000,
                     learning_rate_init=.2,
                     warm_start=True,
                     mini_batch='auto',
                     step_size=50,
                     load_from_file=True,
                     dump_file=True,
                     file_root='nn-relu-very-wide')
mlpc.fit(tr_X, tr_Y)
# r = mlpc.predict(ts_X)
# correct = 0
# for i in zip(r, ts_Y.values.reshape(-1)):
#     left, truth = i
#     pre, _ = left
#     if pre == int(truth):
#         correct += 1
#     else:
#         print(i)
import pickle
from sklearn.neural_network import MLPClassifier

train = pickle.load(open('train_pca.pickle', 'rb'))
test = pickle.load(open('test_pca.pickle', 'rb'))

train_num = 200
test_num = 50

X_train = train[0][:train_num]
y_train = train[1][:train_num]

X_test = test[0][:test_num]
y_test = test[1][:test_num]

mlp = MLPClassifier(hidden_layer_sizes=(100, ),
                    max_iter=50,
                    alpha=1e-4,
                    solver='sgd',
                    verbose=10,
                    tol=1e-4,
                    random_state=1,
                    learning_rate_init=.1)

mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))
Exemple #26
0
def main():
    data, targets = loadData()
    norm_data = preprocessing.normalize(data)
    train, test, train_t, test_t = processIris(norm_data, targets)

    file_reader = FileReader()
    data_processor = DataProcessor()
    raw_data = file_reader.read_file("health.txt")
    h_data, h_data_norm, p_targets = data_processor.process_health(raw_data)
    p_train, p_test, p_train_t, p_test_t = processIris(h_data, p_targets)

    iris_network = NeuralNet()
    iris_network.create_layer(3)
    iris_network.train_network(train, train_t)
    iris_predictions = iris_network.predict(test)

    correct = 0
    for i in range(len(test_t)):
        if iris_predictions[i] == test_t[i]:
            correct += 1

    print("Iris with 3 nodes in hidden layer")
    print("Iris prediction correct = ", correct, "out of", len(test),
          "\nAccuracy = ", (correct / len(test_t) * 100))

    iris2_network = NeuralNet()
    iris2_network.create_layer(6)
    iris2_network.train_network(train, train_t)
    iris2_predictions = iris2_network.predict(test)

    correct = 0
    for i in range(len(test_t)):
        if iris2_predictions[i] == test_t[i]:
            correct += 1

    print("Iris with 6 nodes in hidden layer")
    print("Iris prediction correct = ", correct, "out of", len(test),
          "\nAccuracy = ", (correct / len(test_t) * 100))

    mlp_class = MLPClassifier(solver='lbfgs',
                              alpha=1e-5,
                              hidden_layer_sizes=(6))
    mlp_class.fit(train, train_t)
    mlp_iris_predict = mlp_class.predict(test)

    correct = 0
    for i in range(len(test_t)):
        if mlp_iris_predict[i] == test_t[i]:
            correct += 1

    print("Iris with 6 nodes in hidden layer SKLEARN MODEL")
    print("Iris prediction correct = ", correct, "out of", len(test_t),
          "\nAccuracy = ", (correct / len(test_t) * 100))

    pima_network = NeuralNet()
    pima_network.create_layer(4)
    pima_network.train_network(p_train, p_train_t)
    pima_predictions = pima_network.predict(p_test)

    correct = 0
    for i in range(len(p_test_t)):
        if pima_predictions[i] == p_test_t[i]:
            correct += 1

    print("Pima with 4 nodes in hidden layer")
    print("Pima prediction correct = ", correct, "out of", len(p_test),
          "\nAccuracy = ", (correct / len(p_test_t) * 100))

    pima2_network = NeuralNet()
    pima2_network.create_layer(6)
    pima2_network.train_network(p_train, p_train_t)
    pima2_predictions = pima2_network.predict(p_test)

    correct = 0
    for i in range(len(p_test_t)):
        if pima2_predictions[i] == p_test_t[i]:
            correct += 1

    print("Pima with 6 nodes in hidden layer")
    print("Pima prediction correct = ", correct, "out of", len(p_test),
          "\nAccuracy = ", (correct / len(p_test_t) * 100))

    mlp_pima_class = MLPClassifier(solver='lbfgs',
                                   alpha=1e-5,
                                   hidden_layer_sizes=(6))
    mlp_pima_class.fit(p_train, p_train_t.ravel())
    mlp_pima_predict = mlp_pima_class.predict(p_test)

    correct = 0
    for i in range(len(p_test_t)):
        if mlp_pima_predict[i] == p_test_t[i]:
            correct += 1

    print("Pima with 6 nodes in hidden layer SKLEARN MODEL")
    print("Pima prediction correct = ", correct, "out of", len(p_test),
          "\nAccuracy = ", (correct / len(p_test_t) * 100))
    data_content = data.content
    data_label = data.label.tolist()

    count_vect = CountVectorizer(stop_words='english')

    csv_file = open(output, "w", newline='')
    writer = csv.writer(csv_file, delimiter=',')
    for clf_name in clf_names:
        if clf_name == 'lr':
            clf = LogisticRegression()
        elif clf_name == 'svm':
            # the kernel can also be 'linear', 'rbf','polynomial','sigmoid', etc.
            clf = svm.SVC(kernel='linear', probability=True)
        elif clf_name == 'mlp':
            clf = MLPClassifier(solver='lbfgs',
                                alpha=1e-5,
                                hidden_layer_sizes=(5, 2),
                                random_state=1)
        elif clf_name == 'nb':
            clf = MultinomialNB()
        elif clf_name == 'rf':
            clf = RandomForestClassifier(oob_score=True, n_estimators=30)
        else:
            print('分类器名称仅为\'lr,svm,mlp,nb,rf\'中的一种')

        # the input data needs to be iterable
        data_content_matrix = count_vect.fit_transform(data_content)
        #    data_content_matrix_dmr = dr.selectFromLinearSVC(data_content,data_label)
        #    data_content_matrix_dmr = dr.selectFromLinearSVC(data_content_matrix,data_label)
        #        train_content_matrix_input_dmr_smt,train_label_input_smt = get_smote_standard(train_content_matrix_input_dmr,train_label_input)
        #    data_content_matrix_dmr_smt,data_label_smt = get_smoteenn(data_content_matrix_dmr,data_label)
print(np.asarray((unique_elements, counts_elements)))

kf = KFold(n_splits=10)#divide o dataset em 10 partes 9 p/ treino e 1 teste

a = 0
f = 0
p = 0
r = 0
i = 0

for train_index, test_index in kf.split(X):
    i+=1
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    clf = MLPClassifier(solver='lbfgs').fit(X_train,y_train.ravel())
    y_pred = clf.predict(X_test)
    a += clf.score(X_test,y_test)
    f += metrics.f1_score(y_test, y_pred, average='macro')
    p += metrics.precision_score(y_test, y_pred, average='macro')
    r += metrics.recall_score(y_test, y_pred, average='macro')

average_accuracy = a/i
average_f1_score = f/i
average_precision = p/i
average_recall = r/i

print('Accuracy: ')
print(average_accuracy)

print('F1 - Score:')
Exemple #29
0
def class34(filename, i):
    ''' This function performs experiment 3.4
    
    Parameters
       filename : string, the name of the npz file from Task 2
       i: int, the index of the supposed best classifier (from task 3.1)  
        '''
    i = i - 1
    data = np.load(filename)["arr_0"]

    X = []
    y = []

    for d in data:
        X.append(d[0:173])
        y.append(d[173])

    X = np.array(X)
    y = np.array(y)

    classifiers = [
        SVC(kernel='linear', max_iter=1000),
        SVC(gamma=2, max_iter=1000),
        RandomForestClassifier(max_depth=5, n_estimators=10),
        MLPClassifier(alpha=0.05),
        AdaBoostClassifier()
    ]

    kf = KFold(n_splits=5, shuffle=True)

    # global list to store result
    fold_test_result_list = []
    p_values = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        accuracy_list = []
        for clf in classifiers:
            classifier = clone(clf)

            classifier.fit(X_train, y_train)
            prediction = classifier.predict(X_test)
            c_m = confusion_matrix(y_test, prediction)
            accuracy_list.append(accuracy(c_m))

        fold_test_result_list.append(accuracy_list)

    vertical_result = np.transpose(fold_test_result_list)

    # compare the result with the best classifier
    for j in range(len(classifiers)):
        if i != j:
            S = stats.ttest_rel(vertical_result[i], vertical_result[j])
            p_values.append(S[1])

    with open('a1_3.4.csv', 'w', newline='') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        for result in fold_test_result_list:
            spamwriter.writerow(result)
        spamwriter.writerow(p_values)

        spamwriter.writerow([
            "The accuracy of the cross-validation's result may lead different result as part3.1 "
            +
            "It could be caused by the variance of the data. In the 3.1, there are only one set of training"
            " and testing data. The form of the trianing set may lead to bias."
        ])
                    #                     data.append(tuple(["Dataset-" + str(c),"","","","","","","","","","","","","","","","","","","",""]))
                    #                     data2.append(tuple(["Dataset-" + str(c),"","","","","","","","","","","","","","","","","","","",""]))

                    row = ["$P$"]
                    #                     print(" &","$P$", end="")
                    for sampler in samplers_array_all:
                        t = ""
                        #                         precision, recall, f1, rocauc, kappa, gmean = evalSampling(sampler, RandomForestClassifier(max_depth=2, random_state=0), Xtrain, Xtest, ytrain, ytest)
                        #                         print(precision)
                        try:
                            precision, recall, f1, rocauc, kappa, gmean = evalSampling(
                                sampler,
                                MLPClassifier(solver='adam',
                                              alpha=1e-5,
                                              hidden_layer_sizes=(15, 10),
                                              batch_size=18,
                                              max_iter=300,
                                              random_state=1), Xtrain, Xtest,
                                ytrain, ytest)
                            #                             print(" &", round(precision,3), end="")
                            t = str(round(precision, 3))
                        except:
                            #                             print(" &", "N/A", end="")
                            t = "N/A"

                        row.append(t)

#                     print(row)
                    data.append(tuple(row))

                    #                     print("\\\\")