def test_logistic_regression_learning_rate_schedules(self, learning_rate_schedules, get_binary_classification_data):        
     X, y = get_binary_classification_data        
     clf = LogisticRegression(epochs=200, checkpoint=10, learning_rate=learning_rate_schedules, patience=40)
     clf.fit(X, y)       
     # Confirm learning rates decreased
     assert clf.history.epoch_log.get('learning_rate')[0] > clf.history.epoch_log.get('learning_rate')[-1], "Learning rate didn't decrease"
     assert clf.history.epoch_log.get('learning_rate')[0] != clf.eta, "Learning rate didn't change"        
 def test_logistic_regression_validation(self, get_binary_classification_data):
     X, y = get_binary_classification_data
     clf = LogisticRegression(epochs=50, metric='mean')                
     with pytest.raises(ValueError):
         clf.fit(X,y)
     clf = LogisticRegression(epochs=50, cost='quadratic')                
     with pytest.raises(ValueError):
         clf.fit(X,y)            
Exemple #3
0
def get_classes():
    c = Classes()
    classes = [LinearRegression(), LassoRegression(), RidgeRegression(),
               ElasticNetRegression(), LogisticRegression(), 
               MultinomialLogisticRegression()]
    for cls in classes:
        c.add_class(cls)
    return c
 def test_logistic_regression_predict(self, get_binary_classification_data):
     X, y = get_binary_classification_data
     clf = LogisticRegression(epochs=100, learning_rate=0.01, checkpoint=10)
     clf.fit(X,y)
     y_pred = clf._predict(X)
     assert y_pred.shape == (y.shape[0],), "y_pred has wrong shape for binary problem"                
     y_pred = clf.predict(X)        
     score = clf.score(X,y)
     assert y_pred.shape == (y.shape[0],), "y_pred has wrong shape for binary problem"
     assert score > 0.3, "Accuracy below 0.3"
     assert score < 1, "Accuracy is greater than or equal to 1"
 def test_logistic_regression_history_w_early_stop(self, get_binary_classification_data):        
     X, y = get_binary_classification_data
     es = EarlyStopImprovement()
     clf = LogisticRegression(epochs=10, early_stop=es)
     clf.fit(X, y)        
     # Test epoch history
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('epoch')), "number of epochs in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('learning_rate')), "number of learning rates in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('theta')), "number of thetas in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('train_cost')), "number of train costs in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('val_cost')), "number of val costs in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('train_score')), "number of train score in log doesn't match epochs"        
     assert clf.history.total_epochs == len(clf.history.epoch_log.get('val_score')), "number of val score in log doesn't match epochs"        
     assert all(np.equal(clf.theta, clf.history.epoch_log.get('theta')[-1])), "Last theta in log doesn't equal final theta."
     assert clf.history.epoch_log.get('train_cost')[0] > clf.history.epoch_log.get('train_cost')[-1], "train_cost does not decrease"
     #assert clf.history.epoch_log.get('train_score')[0] > clf.history.epoch_log.get('train_score')[-1], "train_score does not decrease"
     assert clf.history.epoch_log.get('val_cost')[0] > clf.history.epoch_log.get('val_cost')[-1], "val_cost does not decrease"
     #assert clf.history.epoch_log.get('val_score')[0] > clf.history.epoch_log.get('val_score')[-1], "val_score does not decrease"        
     # Test batch history
     assert clf.history.total_batches == len(clf.history.batch_log.get('batch')), "number of batches in log doesn't match total batches"        
     assert clf.history.total_batches == len(clf.history.batch_log.get('batch_size')), "number of batch sizes in log doesn't match total batches"        
     assert clf.history.total_batches == len(clf.history.batch_log.get('theta')), "number of thetas in log doesn't match total batches"        
     assert clf.history.total_batches == len(clf.history.batch_log.get('train_cost')), "number of train_costs in log doesn't match total batches"                
 def test_logistic_regression_name(self, get_binary_classification_data):
     X, y = get_binary_classification_data
     clf = LogisticRegression(epochs=50)        
     clf.fit(X,y)
     assert clf.name == 'Logistic Regression with Batch Gradient Descent'
     clf = LogisticRegression(epochs=50, batch_size=1)        
     clf.fit(X,y)
     assert clf.name == 'Logistic Regression with Stochastic Gradient Descent'
     clf = LogisticRegression(epochs=50, batch_size=32)        
     clf.fit(X,y)
     assert clf.name == 'Logistic Regression with Minibatch Gradient Descent'
 def test_logistic_regression_early_stop_from_estimator_val_score(self, get_binary_classification_data): 
     X, y = get_binary_classification_data        
     clf = LogisticRegression(epochs=5000, early_stop=True, val_size=0.3, metric='accuracy')
     clf.fit(X, y)
     assert clf.convergence_monitor.monitor == 'val_score', "Estimator is not sending correct metric"                     
 def test_logistic_regression_early_stop_from_estimator_train_cost(self, get_binary_classification_data): 
     X, y = get_binary_classification_data        
     clf = LogisticRegression(epochs=5000, early_stop=False, val_size=0.3, metric=None)
     clf.fit(X, y)
     assert clf.convergence_monitor.monitor == 'train_cost', "Estimator is not sending correct metric"
#                                   DATA                                       #
# ---------------------------------------------------------------------------- #
#%%
# Data
X, y = datasets.load_breast_cancer(return_X_y=True)
# Data transformation
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y)
# ---------------------------------------------------------------------------- #
#                            LOGISTIC REGRESSION                               #
# ---------------------------------------------------------------------------- #
#%%
# Linear Regression
clf = LogisticRegression(epochs=500, learning_rate=0.05, metric='accuracy')
clf.fit(X_train, y_train)
history = clf.history
costs = history.epoch_log['train_cost']
# ---------------------------------------------------------------------------- #
#                            LEARNING CURVE                                    #
# ---------------------------------------------------------------------------- #
#%%
# Learning Curve
data = go.Scatter(x=np.linspace(0, len(costs), len(costs)),
                  y=costs,
                  mode='lines',
                  line=dict(color='steelblue'))
layout = go.Layout(title='Wisconsin Breast Cancer Dataset Learning Curve',
                   xaxis_title="Epochs",
                   yaxis_title='Cross-Entropy Cost',