def estimate_logReg_true_test_error(xs_train, ys_train, xs_test, ys_test, num_features, best_c_param_value=1e12, score_type='brier_score'): logReg = skl_logistic_regression( C=best_c_param_value, tol=1e-10 ) # Initialise the Logistic Regression Classifier, for the Best Regularization C Parameter found logReg.fit( xs_train[:, :num_features], ys_train ) # Fit the Logistic Regression Classifier with the Training Set ys_logReg_predict_prob = logReg.predict_proba( xs_test[:, :num_features] )[:, 1] # Predict the Probabilities of the Features of the Testing Set, belongs to a certain Class logReg_predict_classes_xs_test = logReg.predict( xs_test ) # Predict and Classify the Values of the Testing Set, with the Logistic Regression Classifier TODO Confirmar # Estimate the Testing Error, based on a certain type of Scoring # 1) Brier Scoring if (score_type == 'brier_score'): estimated_true_test_error = skl_brier_score_loss( ys_test, ys_logReg_predict_prob ) # Estimate the Testing Error, related to its Brier Score # 2) Logistic Regression Scoring if (score_type == 'logistic_regression_score'): estimated_accuracy_test = logReg.score( xs_test, ys_test ) # Compute the Training Set's Accuracy (Score), for the Logistic Regression estimated_true_test_error = ( 1 - estimated_accuracy_test ) # Compute the Training Error, regarding its Accuracy (Score) num_samples_test_set = len( xs_test) # The Number of Samples, from the Testing Set logReg_num_incorrect_predictions = 0 # The Real Number of Incorrect Predictions, regarding the Logistic Regression Classifier # For each Sample, from the Testing Set for current_sample_test in range(num_samples_test_set): # If the Prediction/Classification of the Class for the current Sample, of the Testing Set is different from the Real Class of the same, # it's considered an Real Error in Prediction/Classification, regarding the Logistic Regression Classifier if (logReg_predict_classes_xs_test[current_sample_test] != ys_test[current_sample_test]): logReg_num_incorrect_predictions += 1 # Return the Predictions of the Samples, # the Real Number of Incorrect Predictions and the Estimated True/Test Error, for the Logistic Regression Classifier return logReg_predict_classes_xs_test, logReg_num_incorrect_predictions, estimated_true_test_error
def compute_logReg_errors(xs, ys, train_idx, valid_idx, c_param_value, num_features, score_type='brier_score'): # Initialise the Logistic Regression, from the Linear Model of the SciKit-Learn logReg = skl_logistic_regression(C=c_param_value, tol=1e-10) # Fit the Logistic Regression logReg.fit(xs[train_idx, :num_features], ys[train_idx]) # Compute the prediction probabilities of some Features, belonging to a certain Class, due to the ys_logReg_predict_prob = logReg.predict_proba(xs[:, :num_features])[:, 1] # Compute the Training and Validation Errors, based on a certain type of Scoring: # 1) Based on Brier Score if (score_type == 'brier_score'): logReg_train_error = skl_brier_score_loss( ys[train_idx], ys_logReg_predict_prob[train_idx] ) # Compute the Training Error, related to its Brier Score logReg_valid_error = skl_brier_score_loss( ys[valid_idx], ys_logReg_predict_prob[valid_idx] ) # Compute the Validation Error, related to its Brier Score # 2) Based on Logistic Regression Score if (score_type == 'logistic_regression_score'): logReg_accuracy_train = logReg.score( xs[train_idx], ys[train_idx] ) # Compute the Training Set's Accuracy (Score), for the Logistic Regression logReg_accuracy_valid = logReg.score( xs[valid_idx], ys[valid_idx] ) # Compute the Validation Set's Accuracy (Score), for the Logistic Regression logReg_train_error = ( 1 - logReg_accuracy_train ) # Compute the Training Error, regarding its Accuracy (Score) logReg_valid_error = ( 1 - logReg_accuracy_valid ) # Compute the Validation Error, regarding its Accuracy (Score) # Return the Training and Validation Errors, for the Logistic Regression return logReg_train_error, logReg_valid_error
def fit(self, train_fs, train_labels, valid_fs, valid_labels, test_fs, test_labels): self.model = skl_logistic_regression( penalty=self.params['penalty'], dual=self.params['dual'], tol=self.params['tol'], C=self.params['C'], verbose=self.params['verbose'], max_iter=self.params['max_iter'], solver=self.params['solver'], n_jobs=self.params['n_jobs'], multi_class=self.params['multi_class']) self.model.fit(X=train_fs, y=train_labels) train_preds = self.model.predict_proba(train_fs)[:, 1] valid_preds = self.model.predict_proba(valid_fs)[:, 1] test_preds = self.model.predict_proba(test_fs)[:, 1] return train_preds, valid_preds, test_preds
def fit(self, train_fs, train_labels, valid_fs, valid_labels, test_fs, test_labels): self.model = skl_logistic_regression(penalty=self.params['penalty'], dual=self.params['dual'], tol=self.params['tol'], C=self.params['C'], verbose=self.params['verbose'], max_iter=self.params['max_iter'], solver=self.params['solver'], n_jobs=self.params['n_jobs'], multi_class=self.params['multi_class']) self.__lock() self.model.fit(X=train_fs, y=train_labels) self.__unlock() train_preds = self.model.predict_proba(train_fs)[:, 1] valid_preds = self.model.predict_proba(valid_fs)[:, 1] test_preds = self.model.predict_proba(test_fs)[:, 1] return train_preds, valid_preds, test_preds
# Loop to iterate all the Iris' Classes for ys_class_iris in range(3): # Create an array of Classes, # for the current Logistic Regression Classifier ys_classes_current_logReg = np.zeros(num_samples) # Set the new Classes, based on the current iterated Iris' Class ys_classes_current_logReg[ys_classes == ys_class_iris] = 0 ys_classes_current_logReg[ys_classes != ys_class_iris] = 1 # Initialise the current Logistic Regression Classifier, # for the current Settings logReg = skl_logistic_regression(C=c_param_value, tol=1e-10) # Fit the Logistic Regression Classifier with the Training Set logReg.fit(xs_features, ys_classes_current_logReg) # Append the current Logistic Regression Classifier, # for the current Settings logRegs_classifiers.append(logReg) # Plot a customized One vs. Rest Algorithm, for the Logistic Regression Classifier, # in the classification of the Iris' Dataset problem plot_logistic_regressions(logRegs_classifiers, "one-vs-rest-logistic-regression-iris-dataset.png") # Initialise the One vs. Rest Algorithm, # from the SciKit-Learn's Python Library, for Support Vector Machines,