예제 #1
0
def cross_validation_decision_boundary_fishers(inputs,
                                               targets,
                                               cv_folds,
                                               num_folds,
                                               decision_boundaries,
                                               num_decision_boundaries,
                                               robust=0):

    train_accuracy_array = np.zeros(num_decision_boundaries)
    test_accuracy_array = np.zeros(num_decision_boundaries)

    for f in range(num_folds):
        fold_filters = cv_folds[f]
        training_filter = fold_filters[0]
        validation_filter = fold_filters[1]
        training_data, training_targets, validation_data, validation_targets = train_and_test_partition(
            inputs, targets, training_filter, validation_filter)
        if robust == 0:
            fisher_weights = fisher_linear_discriminant_projection(
                training_data, training_targets)
        elif robust == 1:
            fisher_weights = robust_fisher_linear_discriminant_projection(
                training_data, training_targets, 1e-6)

        projected_inputs_train = project_data(training_data, fisher_weights)
        projected_inputs_test = project_data(validation_data, fisher_weights)
        new_ordering_train = np.argsort(projected_inputs_train)
        new_ordering_test = np.argsort(projected_inputs_test)
        projected_inputs_train = projected_inputs_train[new_ordering_train]
        projected_inputs_test = projected_inputs_test[new_ordering_test]

        training_targets = np.copy(training_targets[new_ordering_train])
        validation_targets = np.copy(validation_targets[new_ordering_test])
        predicted_train = np.empty(len(projected_inputs_train))
        predicted_test = np.empty(len(projected_inputs_test))

        for j in range(len(decision_boundaries)):
            train_accuracy_temp = 0.0
            test_accuracy_temp = 0.0
            for i in range(len(training_targets)):
                predicted_train[i] = (projected_inputs_train[i] >
                                      decision_boundaries[j]).astype(int)
                train_accuracy_temp += (
                    predicted_train[i]
                    == training_targets[i]).astype(int) / len(training_targets)

            for t in range(len(validation_targets)):
                predicted_test[t] = (projected_inputs_test[t] >
                                     decision_boundaries[j]).astype(int)
                test_accuracy_temp += (predicted_test[t]
                                       == validation_targets[t]
                                       ).astype(int) / len(validation_targets)
            test_accuracy_array[j] += test_accuracy_temp / num_folds
            train_accuracy_array[j] += train_accuracy_temp / num_folds

    return train_accuracy_array, test_accuracy_array, decision_boundaries[
        test_accuracy_array.tolist().index(np.max(test_accuracy_array))]
예제 #2
0
def ROC_values_and_AUC(train_inputs, train_targets, test_inputs, test_targets,
                       robust):
    if robust == 0:
        weights = fisher_linear_discriminant_projection(
            train_inputs, train_targets)
    elif robust == 1:
        weights = robust_fisher_linear_discriminant_projection(
            train_inputs, train_targets, 1e-6)

    projected_inputs = project_data(test_inputs, weights)
    new_ordering = np.argsort(projected_inputs)
    projected_inputs = projected_inputs[new_ordering]
    plot_targets = np.copy(test_targets[new_ordering])
    N = test_targets.size
    num_neg = np.sum(1 - test_targets)
    num_pos = np.sum(test_targets)
    false_positive_rates = np.empty(N)
    true_positive_rates = np.empty(N)
    for i, w0 in enumerate(projected_inputs):
        false_positive_rates[i] = np.sum(1 - plot_targets[i:]) / num_neg
        true_positive_rates[i] = np.sum(plot_targets[i:]) / num_pos

    AUC = -np.trapz(true_positive_rates, false_positive_rates)

    return false_positive_rates, true_positive_rates, AUC
예제 #3
0
def construct_and_plot_roc(inputs,
                           targets,
                           method='maximum_separation',
                           **kwargs):
    """
    Takes input and target data for classification and projects
    this down onto 1 dimension according to the given method,
    then plots roc curve for the data.

    parameters
    ----------
    inputs - a 2d input matrix (array-like), each row is a data-point
    targets - 1d target vector (array-like) -- can be at most 2 classes ids
        0 and 1
    """
    weights = get_projection_weights(inputs, targets, method)
    projected_inputs = project_data(inputs, weights)
    new_ordering = np.argsort(projected_inputs)
    projected_inputs = projected_inputs[new_ordering]
    targets = np.copy(targets[new_ordering])
    N = targets.size
    num_neg = np.sum(1 - targets)
    num_pos = np.sum(targets)
    false_positive_rates = np.empty(N)
    true_positive_rates = np.empty(N)
    for i, w0 in enumerate(projected_inputs):
        false_positive_rates[i] = np.sum(1 - targets[i:]) / num_neg
        true_positive_rates[i] = np.sum(targets[i:]) / num_pos
    fig, ax = plot_roc(false_positive_rates, true_positive_rates, **kwargs)
    return fig, ax
예제 #4
0
def fit_and_plot_prc_fisher(train_inputs, train_targets, test_inputs, test_targets, fig_ax=None, colour=None, type=None):
    weights = fisher_linear_discriminant_projection(train_inputs, train_targets)
    if type == 'training':
        inputs = train_inputs
        targets = train_targets
    elif type == 'testing':
        inputs = test_inputs
        targets = test_targets
    projected_inputs = project_data(inputs, weights)
    # sort project_inputs in ascending order and sort targets accordingly
    new_ordering = np.argsort(projected_inputs)
    projected_inputs = projected_inputs[new_ordering]
    targets = np.copy(targets[new_ordering])
    N = targets.size
    precision_values = np.empty(N)
    recall_values = np.empty(N)
    for i, w0 in enumerate(projected_inputs):
        num_false_positives = np.sum(1-targets[i:])
        num_true_positives = np.sum(targets[i:])
        num_false_negatives = np.sum(targets[:i])
        precision_values[i] = num_true_positives / (num_true_positives + num_false_positives)
        recall_values[i] = num_true_positives / (num_true_positives + num_false_negatives)
    fig, ax = plot_prc(
        recall_values, precision_values, fig_ax=fig_ax, colour=colour)
    auc = np.trapz(np.flip(precision_values), np.flip(recall_values))
    print("FISHER'S LINEAR DISCRIMINANT ON", type, " DATA")
    print("AREA UNDER CURVE: ", auc)
    print(" ")
    return fig, ax
예제 #5
0
def main(ifname, input_cols=None, target_col=None, classes=None):
    """
    Import data and set aside test data
    """

    # import data
    inputs, targets, field_names, classes = import_for_classification(
        ifname, input_cols=input_cols, target_col=target_col, classes=classes)
    # plot fisher's projection
    weights = fisher_linear_discriminant_projection(inputs, targets)
    projected_data = project_data(inputs, weights)
    plot_class_histograms(projected_data, targets)

    plt.show()

    print(np.mean(targets))
예제 #6
0
def project_and_histogram_data(inputs,
                               targets,
                               method,
                               title=None,
                               classes=None):
    """
    Takes input and target data for classification and projects
    this down onto 1 dimension according to the given method,
    then histograms the projected data.

    parameters
    ----------
    inputs - a 2d input matrix (array-like), each row is a data-point
    targets - 1d target vector (array-like) -- can be at most 2 classes ids
        0 and 1
    """
    weights = get_projection_weights(inputs, targets, method)
    projected_inputs = project_data(inputs, weights)
    ax = plot_class_histograms(projected_inputs, targets)
    # label x axis
    ax.set_xlabel(r"$\mathbf{w}^T\mathbf{x}$")
    ax.set_title("Projected Data: %s" % method)
    if not classes is None:
        ax.legend(classes)
예제 #7
0
def predict(inputs, weights, decision_boundary):
    projected_inputs = project_data(inputs, weights)
    predicted = np.array(projected_inputs)
    for i, val in enumerate(projected_inputs):
        predicted[i] = (val > decision_boundary).astype(int)
    return predicted