def test_multinomial_logistic_regression_early_stop(self, get_multinomial_classification_data):        
     X, y = get_multinomial_classification_data
     es = EarlyStopImprovement(precision=0.001, patience=5)
     clf = MultinomialLogisticRegression(epochs=100, early_stop=es, checkpoint=10)
     clf.fit(X, y)       
     # Confirm early stop happened
     assert clf.history.total_epochs < clf.epochs, "Early stop didn't happen."
     # Test epoch history
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('epoch')), "number of epochs in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('learning_rate')), "number of learning rates in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('theta')), "number of thetas in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('train_cost')), "number of train costs in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('val_cost')), "number of val costs in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('train_score')), "number of train score in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('val_score')), "number of val score in log doesn't match epochs"        
     assert np.array_equal(clf.theta, clf.history.epoch_log.get('theta')[-1]) == True, "Last theta in log doesn't equal final theta."
     # Test Performance Trends
     assert clf.history.epoch_log.get('train_cost')[0] > clf.history.epoch_log.get('train_cost')[-1], "Training costs didn't decrease"
     #assert clf.history.epoch_log.get('train_score')[0] < clf.history.epoch_log.get('train_score')[-1], "Training score didn't increase"
     assert clf.history.epoch_log.get('val_cost')[0] > clf.history.epoch_log.get('val_cost')[-1], "Validation costs didn't decrease"
     #assert clf.history.epoch_log.get('val_score')[0] < clf.history.epoch_log.get('val_score')[-1], "Validation score didn't increase"
     # Test batch history
     assert clf.history.total_batches == len(clf.history.batch_log.get('batch')), "number of batches in log doesn't match total batches"        
     assert clf.history.total_batches == len(clf.history.batch_log.get('batch_size')), "number of batch sizes in log doesn't match total batches"        
     assert clf.history.total_batches == len(clf.history.batch_log.get('theta')), "number of thetas in log doesn't match total batches"        
     assert clf.history.total_batches == len(clf.history.batch_log.get('train_cost')), "number of train_costs in log doesn't match total batches"                  
 def test_multinomial_logistic_regression_learning_rate_schedules(self, learning_rate_schedules, get_multinomial_classification_data):        
     X, y = get_multinomial_classification_data        
     clf = MultinomialLogisticRegression(epochs=50, checkpoint=10, learning_rate=learning_rate_schedules)
     clf.fit(X, y)       
     # Confirm learning rates decreased
     assert clf.history.epoch_log.get('learning_rate')[0] > clf.history.epoch_log.get('learning_rate')[-1], "Learning rate didn't decrease"
     assert clf.history.epoch_log.get('learning_rate')[0] != clf.eta, "Learning rate didn't change"
 def test_multinomial_logistic_regression_init_weights(self, get_multinomial_classification_data):
     X, y = get_multinomial_classification_data        
     n_features = X.shape[1]+1
     n_classes = len(np.unique(y))
     clf = MultinomialLogisticRegression(epochs=50)                
     clf.fit(X,y)
     assert clf.theta.shape == (n_features,n_classes), "theta shape incorrect for multi classification"
 def test_multinomial_logistic_regression_prep_data(self, get_multinomial_classification_data):
     X, y = get_multinomial_classification_data
     clf = MultinomialLogisticRegression(epochs=50, cost='categorical_cross_entropy',
                                         val_size=0, early_stop=False)                
     clf.fit(X,y)
     assert X.shape[0] == clf.X.shape[0], "X.shape[0] incorrect in prep data"
     assert X.shape[1]+1 == clf._X_design.shape[1], "X.shape[1] incorrect in prep data"
 def test_multinomial_logistic_regression_validation(self, get_multinomial_classification_data):
     X, y = get_multinomial_classification_data
     clf = MultinomialLogisticRegression(epochs=50, metric='mse')                
     with pytest.raises(ValueError):
         clf.fit(X,y)
     clf = MultinomialLogisticRegression(epochs=50, cost='binary_cross_entropy')                
     with pytest.raises(ValueError):
         clf.fit(X,y)
예제 #6
0
def get_classes():
    c = Classes()
    classes = [LinearRegression(), LassoRegression(), RidgeRegression(),
               ElasticNetRegression(), LogisticRegression(), 
               MultinomialLogisticRegression()]
    for cls in classes:
        c.add_class(cls)
    return c
 def test_multinomial_logistic_regression_predict(self, get_multinomial_classification_data):
     X, y = get_multinomial_classification_data
     clf = MultinomialLogisticRegression(epochs=1000, cost='categorical_cross_entropy', patience=40)
     clf.fit(X,y)
     y_pred = clf._predict(X)
     assert y_pred.shape == (y.shape[0],3), "Shape of prediction is not correct."
     y_pred = clf.predict(X)
     score = clf.score(X,y)
     assert y_pred.shape == (y.shape[0],), "Shape of prediction is not correct."
     assert clf.history.epoch_log.get('train_cost')[0] > clf.history.epoch_log.get('train_cost')[-1], "Training costs didn't decrease"
     assert clf.history.epoch_log.get('train_score')[0] < clf.history.epoch_log.get('train_score')[-1], "Training score didn't increase"
     assert score >= 0.5, "Accuracy below 0.5"
     assert score < 1, "Accuracy is greater than or equal to 1"
 def test_multinomial_logistic_regression_early_stop_from_estimator_val_score(self, get_multinomial_classification_data): 
     X, y = get_multinomial_classification_data        
     clf = MultinomialLogisticRegression(epochs=5000, early_stop=True, val_size=0.3, metric='accuracy')
     clf.fit(X, y)
     assert clf.convergence_monitor.monitor == 'val_score', "Estimator is not sending correct metric"                
 def test_multinomial_logistic_regression_early_stop_from_estimator_train_cost(self, get_multinomial_classification_data): 
     X, y = get_multinomial_classification_data        
     clf = MultinomialLogisticRegression(epochs=5000, early_stop=False, val_size=0.3, metric=None)
     clf.fit(X, y)
     assert clf.convergence_monitor.monitor == 'train_cost', "Estimator is not sending correct metric"
예제 #10
0
 def test_multinomial_logistic_regression_name(self, get_multinomial_classification_data):
     X, y = get_multinomial_classification_data
     clf = MultinomialLogisticRegression(epochs=50, cost='categorical_cross_entropy')        
     clf.fit(X,y)
     assert clf.name == 'Multinomial Logistic Regression with Batch Gradient Descent'
     clf = MultinomialLogisticRegression(epochs=50, batch_size=1,cost='categorical_cross_entropy')        
     clf.fit(X,y)
     assert clf.name == 'Multinomial Logistic Regression with Stochastic Gradient Descent'
     clf = MultinomialLogisticRegression(epochs=50, batch_size=32, cost='categorical_cross_entropy')        
     clf.fit(X,y)
     assert clf.name == 'Multinomial Logistic Regression with Minibatch Gradient Descent'
#%%
# Load wine dataset
X, y = datasets.load_wine(return_X_y=True)
# Standardize Features
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
# Split data into training and test set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, seed=5)
# ---------------------------------------------------------------------------- #
#                            LOGISTIC REGRESSION                               #
# ---------------------------------------------------------------------------- #
#%%
# Create Multinomial Linear Regression Classifier
clf = MultinomialLogisticRegression(epochs=500,
                                    learning_rate=0.001,
                                    metric='accuracy',
                                    seed=5)
# Train the model
clf.fit(X_train, y_train)
# ---------------------------------------------------------------------------- #
#                            LEARNING CURVE                                    #
# ---------------------------------------------------------------------------- #
#%%
# Plot Learning Curve
history = clf.history
costs = history.epoch_log['train_cost']
data = go.Scatter(x=np.linspace(0, len(costs), len(costs)),
                  y=costs,
                  mode='lines',
                  line=dict(color='steelblue'))
layout = go.Layout(title='Wine Dataset Learning Curve',