Beispiel #1
0
def estimate_logReg_true_test_error(xs_train,
                                    ys_train,
                                    xs_test,
                                    ys_test,
                                    num_features,
                                    best_c_param_value=1e12,
                                    score_type='brier_score'):

    logReg = skl_logistic_regression(
        C=best_c_param_value, tol=1e-10
    )  # Initialise the Logistic Regression Classifier, for the Best Regularization C Parameter found
    logReg.fit(
        xs_train[:, :num_features], ys_train
    )  # Fit the Logistic Regression Classifier with the Training Set
    ys_logReg_predict_prob = logReg.predict_proba(
        xs_test[:, :num_features]
    )[:,
      1]  # Predict the Probabilities of the Features of the Testing Set, belongs to a certain Class
    logReg_predict_classes_xs_test = logReg.predict(
        xs_test
    )  # Predict and Classify the Values of the Testing Set, with the Logistic Regression Classifier TODO Confirmar

    # Estimate the Testing Error, based on a certain type of Scoring

    # 1) Brier Scoring
    if (score_type == 'brier_score'):
        estimated_true_test_error = skl_brier_score_loss(
            ys_test, ys_logReg_predict_prob
        )  # Estimate the Testing Error, related to its Brier Score

    # 2) Logistic Regression Scoring
    if (score_type == 'logistic_regression_score'):
        estimated_accuracy_test = logReg.score(
            xs_test, ys_test
        )  # Compute the Training Set's Accuracy (Score), for the Logistic Regression
        estimated_true_test_error = (
            1 - estimated_accuracy_test
        )  # Compute the Training Error, regarding its Accuracy (Score)

    num_samples_test_set = len(
        xs_test)  # The Number of Samples, from the Testing Set
    logReg_num_incorrect_predictions = 0  # The Real Number of Incorrect Predictions, regarding the Logistic Regression Classifier

    # For each Sample, from the Testing Set
    for current_sample_test in range(num_samples_test_set):

        # If the Prediction/Classification of the Class for the current Sample, of the Testing Set is different from the Real Class of the same,
        # it's considered an Real Error in Prediction/Classification, regarding the Logistic Regression Classifier
        if (logReg_predict_classes_xs_test[current_sample_test] !=
                ys_test[current_sample_test]):
            logReg_num_incorrect_predictions += 1

    # Return the Predictions of the Samples,
    # the Real Number of Incorrect Predictions and the Estimated True/Test Error, for the Logistic Regression Classifier
    return logReg_predict_classes_xs_test, logReg_num_incorrect_predictions, estimated_true_test_error
Beispiel #2
0
def compute_logReg_errors(xs,
                          ys,
                          train_idx,
                          valid_idx,
                          c_param_value,
                          num_features,
                          score_type='brier_score'):

    # Initialise the Logistic Regression, from the Linear Model of the SciKit-Learn
    logReg = skl_logistic_regression(C=c_param_value, tol=1e-10)

    # Fit the Logistic Regression
    logReg.fit(xs[train_idx, :num_features], ys[train_idx])

    # Compute the prediction probabilities of some Features, belonging to a certain Class, due to the
    ys_logReg_predict_prob = logReg.predict_proba(xs[:, :num_features])[:, 1]

    # Compute the Training and Validation Errors, based on a certain type of Scoring:
    # 1) Based on Brier Score

    if (score_type == 'brier_score'):
        logReg_train_error = skl_brier_score_loss(
            ys[train_idx], ys_logReg_predict_prob[train_idx]
        )  # Compute the Training Error, related to its Brier Score
        logReg_valid_error = skl_brier_score_loss(
            ys[valid_idx], ys_logReg_predict_prob[valid_idx]
        )  # Compute the Validation Error, related to its Brier Score

    # 2) Based on Logistic Regression Score

    if (score_type == 'logistic_regression_score'):
        logReg_accuracy_train = logReg.score(
            xs[train_idx], ys[train_idx]
        )  # Compute the Training Set's Accuracy (Score), for the Logistic Regression
        logReg_accuracy_valid = logReg.score(
            xs[valid_idx], ys[valid_idx]
        )  # Compute the Validation Set's Accuracy (Score), for the Logistic Regression
        logReg_train_error = (
            1 - logReg_accuracy_train
        )  # Compute the Training Error, regarding its Accuracy (Score)
        logReg_valid_error = (
            1 - logReg_accuracy_valid
        )  # Compute the Validation Error, regarding its Accuracy (Score)

    # Return the Training and Validation Errors, for the Logistic Regression
    return logReg_train_error, logReg_valid_error
Beispiel #3
0
 def fit(self, train_fs, train_labels, valid_fs, valid_labels, test_fs,
         test_labels):
     self.model = skl_logistic_regression(
         penalty=self.params['penalty'],
         dual=self.params['dual'],
         tol=self.params['tol'],
         C=self.params['C'],
         verbose=self.params['verbose'],
         max_iter=self.params['max_iter'],
         solver=self.params['solver'],
         n_jobs=self.params['n_jobs'],
         multi_class=self.params['multi_class'])
     self.model.fit(X=train_fs, y=train_labels)
     train_preds = self.model.predict_proba(train_fs)[:, 1]
     valid_preds = self.model.predict_proba(valid_fs)[:, 1]
     test_preds = self.model.predict_proba(test_fs)[:, 1]
     return train_preds, valid_preds, test_preds
 def fit(self,
         train_fs, train_labels,
         valid_fs, valid_labels,
         test_fs, test_labels):
     self.model = skl_logistic_regression(penalty=self.params['penalty'],
                                          dual=self.params['dual'],
                                          tol=self.params['tol'],
                                          C=self.params['C'],
                                          verbose=self.params['verbose'],
                                          max_iter=self.params['max_iter'],
                                          solver=self.params['solver'],
                                          n_jobs=self.params['n_jobs'],
                                          multi_class=self.params['multi_class'])
     self.__lock()
     self.model.fit(X=train_fs, y=train_labels)
     self.__unlock()
     train_preds = self.model.predict_proba(train_fs)[:, 1]
     valid_preds = self.model.predict_proba(valid_fs)[:, 1]
     test_preds = self.model.predict_proba(test_fs)[:, 1]
     return train_preds, valid_preds, test_preds
Beispiel #5
0
# Loop to iterate all the Iris' Classes
for ys_class_iris in range(3):
    
    # Create an array of Classes,
    # for the current Logistic Regression Classifier
    ys_classes_current_logReg = np.zeros(num_samples)
    
    # Set the new Classes, based on the current iterated Iris' Class
    ys_classes_current_logReg[ys_classes == ys_class_iris] = 0
    ys_classes_current_logReg[ys_classes != ys_class_iris] = 1
   
    
    # Initialise the current Logistic Regression Classifier,
    # for the current Settings
    logReg = skl_logistic_regression(C=c_param_value, tol=1e-10)              
    
    # Fit the Logistic Regression Classifier with the Training Set
    logReg.fit(xs_features, ys_classes_current_logReg)      

    # Append the current Logistic Regression Classifier,
    # for the current Settings
    logRegs_classifiers.append(logReg)                     


# Plot a customized One vs. Rest Algorithm, for the Logistic Regression Classifier,
# in the classification of the Iris' Dataset problem
plot_logistic_regressions(logRegs_classifiers, "one-vs-rest-logistic-regression-iris-dataset.png")

# Initialise the One vs. Rest Algorithm,
# from the SciKit-Learn's Python Library, for Support Vector Machines,