Ejemplo n.º 1
0
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

#perceptron algorithm
stages = [
            ('features', preprocessing.PolynomialFeatures()),
            ('clf', linear_model.Perceptron(max_iter=1000))
        ]
perceptron_pipeline = pipeline.Pipeline(stages)
perceptron_pipeline_grid  = {'features__gamma':[0.1, 0.01, 0.2]}
pipeline_object = comutils.grid_search_best_model(perceptron_pipeline, perceptron_pipeline_grid, X_train, y_train)
final_estimator = pipeline_object.named_steps['clf']
print(final_estimator.intercept_)
print(final_estimator.coef_)
cutils.plot_model_2d_classification(pipeline_object, X_train, y_train)

#logistic regression algorithm
stages = [
            ('features', preprocessing.PolynomialFeatures()),
            ('clf', linear_model.LogisticRegression())
        ]

lr_pipeline = pipeline.Pipeline(stages)
lr_pipeline_grid  = {'features__gamma':[0.1, 1, 5,10]}
pipeline_object = comutils.grid_search_best_model(lr_pipeline, lr_pipeline_grid, X_train, y_train)
final_estimator = pipeline_object.named_steps['clf']
print(final_estimator.intercept_)
cutils.plot_model_2d_classification(pipeline_object, X_train, y_train)

#linear svm algorithm
Ejemplo n.º 2
0
                                                            weights=[0.5, 0.5],
                                                            class_sep=2)
X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000,
                                                                noise=0.1)
cutils.plot_data_2d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

#grid search for parameter values
dt_estimator = tree.DecisionTreeClassifier()
dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))}
final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train,
                                                y_train)
cutils.plot_model_2d_classification(final_estimator, X_train, y_train)

knn_estimator = neighbors.KNeighborsClassifier()
knn_grid = {
    'n_neighbors': list(range(1, 21)),
    'weights': ['uniform', 'distance']
}
final_estimator = cutils.grid_search_best_model(knn_estimator, knn_grid,
                                                X_train, y_train)
cutils.plot_model_2d_classification(final_estimator, X_train, y_train)

rf_estimator = ensemble.RandomForestClassifier()
rf_grid = {
    'max_depth': list(range(5, 10)),
    'n_estimators': list(range(1, 100, 20))
}
Ejemplo n.º 3
0
model = getModel3()
model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x=X_train,
                    y=y_train1,
                    verbose=3,
                    epochs=100,
                    batch_size=10,
                    validation_split=0.1)
print(model.summary())
print(model.get_weights())
kutils.plot_loss(history)
cutils.plot_model_2d_classification(model, X_train, y_train, use_keras=True)

y_pred = model.predict_classes(X_test)
cutils.performance_metrics_hard_multiclass_classification(model,
                                                          X_test,
                                                          y_test,
                                                          use_keras=True)

history = model.fit(x=X_train,
                    y=y_train1,
                    verbose=3,
                    epochs=100,
                    batch_size=32,
                    validation_split=0.1)
print(model.summary())
print(model.get_weights())
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

#perceptron algorithm
stages = [('features', kutils.KernelTransformer('rbf')),
          ('clf', linear_model.Perceptron(max_iter=1000))]
perceptron_pipeline = pipeline.Pipeline(stages)
perceptron_pipeline_grid = {'features__gamma': [0.1, 0.01, 0.2]}
pipeline_object = cutils.grid_search_best_model(perceptron_pipeline,
                                                perceptron_pipeline_grid,
                                                X_train, y_train)
final_estimator = pipeline_object.named_steps['clf']
print(final_estimator.intercept_)
print(final_estimator.coef_)
cutils.plot_model_2d_classification(pipeline_object, X_train, y_train)

#logistic regression algorithm
stages = [('features', kutils.KernelTransformer('rbf')),
          ('clf', linear_model.LogisticRegression())]

lr_pipeline = pipeline.Pipeline(stages)
lr_pipeline_grid = {'features__gamma': [0.1, 1, 5, 10]}
pipeline_object = cutils.grid_search_best_model(lr_pipeline, lr_pipeline_grid,
                                                X_train, y_train)
final_estimator = pipeline_object.named_steps['clf']
print(final_estimator.intercept_)
cutils.plot_model_2d_classification(pipeline_object, X_train, y_train)

#linear svm algorithm
stages = [('features', kutils.KernelTransformer('poly')),
Ejemplo n.º 5
0
import sys
sys.path.append("E:/utils")

import common_utils as comutils
import classification_utils as cutils
from sklearn import preprocessing, linear_model, pipeline

X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000,
                                                                noise=0.1)
X, y = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=1000,
                                                                noise=0.1)

cutils.plot_data_2d_classification(X, y)

stages = [('features', preprocessing.PolynomialFeatures()),
          ('perceptron', linear_model.Perceptron(max_iter=1000))]
perceptron_pipeline = pipeline.Pipeline(stages)
perceptron_pipeline_grid = {
    'perceptron__penalty': ['l1'],
    'perceptron__alpha': [0, 0.1, 0.3, 0.5],
    'features__degree': [2, 3]
}
pipeline_object = comutils.grid_search_best_model(perceptron_pipeline,
                                                  perceptron_pipeline_grid, X,
                                                  y)
final_estimator = pipeline_object.named_steps['perceptron']
print(final_estimator.intercept_)
print(final_estimator.coef_)
cutils.plot_model_2d_classification(pipeline_object, X, y)
Ejemplo n.º 6
0
    n_samples=1000, n_features=2, n_classes=4, weights=[0.3, 0.3, 0.3, 0.3])
#make_classification X : array of shape [n_samples, n_features] it will features and its values,
#y : array of shape [n_samples]  The integer value or labels for class membership of each sample
cutils.plot_data_2d_classification(X, y)

X_train, X_text, Y_train, Y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
# model_selection.train_test_split - Split arrays or matrices into random train and test subsets
#test_size - represent the proportion of the dataset to include in the test split
# random_state - If int, random_state is the seed used by the random number generator
cutils.plot_data_2d_classification(X_train, Y_train)
cutils.plot_data_2d_classification(X_text, Y_test)

knn_estimator = neighbors.KNeighborsClassifier()
knn_estimator.fit(X_train, Y_train)
cutils.plot_model_2d_classification(knn_estimator, X_train, Y_train)

y_pred = knn_estimator.predict(X_text)
metrics.accuracy_score(Y_test, y_pred)
'''
In multilabel classification, this function computes subset accuracy: 
the set of labels predicted for a sample must exactly match the corresponding set of labels in y_true
Y_test = Ground truth (correct) labels
y_pred = Predicted labels, as returned by a classifier
'''
metrics.confusion_matrix(Y_test, y_pred)
'''
By definition a confusion matrix  is such that  is equal to the number of observations
known to be in group  but predicted to be in group .
Thus in binary classification, the count of true negatives is , false negatives is , true positives is  
and false positives is 
Ejemplo n.º 7
0
cv_scores = model_selection.cross_val_score(svoting_estimator, X_train,
                                            y_train)
print(np.mean(cv_scores))

svoting_estimator = ensemble.VotingClassifier([('dt', dt_estimator),
                                               ('knn', knn_estimator),
                                               ('rf', rf_estimator)],
                                              voting='soft',
                                              weights=[1, 1, 3])
svoting_estimator.fit(X_train, y_train)
cv_scores = model_selection.cross_val_score(svoting_estimator, X_train,
                                            y_train)
print(np.mean(cv_scores))

svoting_estimator = ensemble.VotingClassifier([('dt', dt_estimator),
                                               ('knn', knn_estimator),
                                               ('rf', rf_estimator)])
svoting_grid = {
    'voting': ['hard', 'soft'],
    'weights': [(1, 1, 1), (1, 1, 2), (1, 1, 3)]
}
svoting_grid_estimator = model_selection.GridSearchCV(svoting_estimator,
                                                      svoting_grid,
                                                      cv=10,
                                                      refit=True)
svoting_grid_estimator.fit(X_train, y_train)

print(svoting_grid_estimator.best_params_)
print(svoting_grid_estimator.best_score_)
cutils.plot_model_2d_classification(hvoting_estimator, X_train, y_train)