def cross_validation_decision_boundary_fishers(inputs, targets, cv_folds, num_folds, decision_boundaries, num_decision_boundaries, robust=0): train_accuracy_array = np.zeros(num_decision_boundaries) test_accuracy_array = np.zeros(num_decision_boundaries) for f in range(num_folds): fold_filters = cv_folds[f] training_filter = fold_filters[0] validation_filter = fold_filters[1] training_data, training_targets, validation_data, validation_targets = train_and_test_partition( inputs, targets, training_filter, validation_filter) if robust == 0: fisher_weights = fisher_linear_discriminant_projection( training_data, training_targets) elif robust == 1: fisher_weights = robust_fisher_linear_discriminant_projection( training_data, training_targets, 1e-6) projected_inputs_train = project_data(training_data, fisher_weights) projected_inputs_test = project_data(validation_data, fisher_weights) new_ordering_train = np.argsort(projected_inputs_train) new_ordering_test = np.argsort(projected_inputs_test) projected_inputs_train = projected_inputs_train[new_ordering_train] projected_inputs_test = projected_inputs_test[new_ordering_test] training_targets = np.copy(training_targets[new_ordering_train]) validation_targets = np.copy(validation_targets[new_ordering_test]) predicted_train = np.empty(len(projected_inputs_train)) predicted_test = np.empty(len(projected_inputs_test)) for j in range(len(decision_boundaries)): train_accuracy_temp = 0.0 test_accuracy_temp = 0.0 for i in range(len(training_targets)): predicted_train[i] = (projected_inputs_train[i] > decision_boundaries[j]).astype(int) train_accuracy_temp += ( predicted_train[i] == training_targets[i]).astype(int) / len(training_targets) for t in range(len(validation_targets)): predicted_test[t] = (projected_inputs_test[t] > decision_boundaries[j]).astype(int) test_accuracy_temp += (predicted_test[t] == validation_targets[t] ).astype(int) / len(validation_targets) test_accuracy_array[j] += test_accuracy_temp / num_folds train_accuracy_array[j] += train_accuracy_temp / num_folds return train_accuracy_array, test_accuracy_array, decision_boundaries[ test_accuracy_array.tolist().index(np.max(test_accuracy_array))]
def ROC_values_and_AUC(train_inputs, train_targets, test_inputs, test_targets, robust): if robust == 0: weights = fisher_linear_discriminant_projection( train_inputs, train_targets) elif robust == 1: weights = robust_fisher_linear_discriminant_projection( train_inputs, train_targets, 1e-6) projected_inputs = project_data(test_inputs, weights) new_ordering = np.argsort(projected_inputs) projected_inputs = projected_inputs[new_ordering] plot_targets = np.copy(test_targets[new_ordering]) N = test_targets.size num_neg = np.sum(1 - test_targets) num_pos = np.sum(test_targets) false_positive_rates = np.empty(N) true_positive_rates = np.empty(N) for i, w0 in enumerate(projected_inputs): false_positive_rates[i] = np.sum(1 - plot_targets[i:]) / num_neg true_positive_rates[i] = np.sum(plot_targets[i:]) / num_pos AUC = -np.trapz(true_positive_rates, false_positive_rates) return false_positive_rates, true_positive_rates, AUC
def construct_and_plot_roc(inputs, targets, method='maximum_separation', **kwargs): """ Takes input and target data for classification and projects this down onto 1 dimension according to the given method, then plots roc curve for the data. parameters ---------- inputs - a 2d input matrix (array-like), each row is a data-point targets - 1d target vector (array-like) -- can be at most 2 classes ids 0 and 1 """ weights = get_projection_weights(inputs, targets, method) projected_inputs = project_data(inputs, weights) new_ordering = np.argsort(projected_inputs) projected_inputs = projected_inputs[new_ordering] targets = np.copy(targets[new_ordering]) N = targets.size num_neg = np.sum(1 - targets) num_pos = np.sum(targets) false_positive_rates = np.empty(N) true_positive_rates = np.empty(N) for i, w0 in enumerate(projected_inputs): false_positive_rates[i] = np.sum(1 - targets[i:]) / num_neg true_positive_rates[i] = np.sum(targets[i:]) / num_pos fig, ax = plot_roc(false_positive_rates, true_positive_rates, **kwargs) return fig, ax
def fit_and_plot_prc_fisher(train_inputs, train_targets, test_inputs, test_targets, fig_ax=None, colour=None, type=None): weights = fisher_linear_discriminant_projection(train_inputs, train_targets) if type == 'training': inputs = train_inputs targets = train_targets elif type == 'testing': inputs = test_inputs targets = test_targets projected_inputs = project_data(inputs, weights) # sort project_inputs in ascending order and sort targets accordingly new_ordering = np.argsort(projected_inputs) projected_inputs = projected_inputs[new_ordering] targets = np.copy(targets[new_ordering]) N = targets.size precision_values = np.empty(N) recall_values = np.empty(N) for i, w0 in enumerate(projected_inputs): num_false_positives = np.sum(1-targets[i:]) num_true_positives = np.sum(targets[i:]) num_false_negatives = np.sum(targets[:i]) precision_values[i] = num_true_positives / (num_true_positives + num_false_positives) recall_values[i] = num_true_positives / (num_true_positives + num_false_negatives) fig, ax = plot_prc( recall_values, precision_values, fig_ax=fig_ax, colour=colour) auc = np.trapz(np.flip(precision_values), np.flip(recall_values)) print("FISHER'S LINEAR DISCRIMINANT ON", type, " DATA") print("AREA UNDER CURVE: ", auc) print(" ") return fig, ax
def main(ifname, input_cols=None, target_col=None, classes=None): """ Import data and set aside test data """ # import data inputs, targets, field_names, classes = import_for_classification( ifname, input_cols=input_cols, target_col=target_col, classes=classes) # plot fisher's projection weights = fisher_linear_discriminant_projection(inputs, targets) projected_data = project_data(inputs, weights) plot_class_histograms(projected_data, targets) plt.show() print(np.mean(targets))
def project_and_histogram_data(inputs, targets, method, title=None, classes=None): """ Takes input and target data for classification and projects this down onto 1 dimension according to the given method, then histograms the projected data. parameters ---------- inputs - a 2d input matrix (array-like), each row is a data-point targets - 1d target vector (array-like) -- can be at most 2 classes ids 0 and 1 """ weights = get_projection_weights(inputs, targets, method) projected_inputs = project_data(inputs, weights) ax = plot_class_histograms(projected_inputs, targets) # label x axis ax.set_xlabel(r"$\mathbf{w}^T\mathbf{x}$") ax.set_title("Projected Data: %s" % method) if not classes is None: ax.legend(classes)
def predict(inputs, weights, decision_boundary): projected_inputs = project_data(inputs, weights) predicted = np.array(projected_inputs) for i, val in enumerate(projected_inputs): predicted[i] = (val > decision_boundary).astype(int) return predicted