コード例 #1
0
def regression_with_cross_validation_and_c_index(inputs, outputs, num_folds,
                                                 k):

    c_total_predictions = []
    cd_predictions = []
    pb_predictions = []

    for fold in range(0, num_folds):

        #How many data objects in each fold
        len_fold = int(len(inputs) / num_folds)

        #Where is the test set placed in the data
        ix_test_first = fold * len_fold
        ix_test_one_past_last = ix_test_first + len_fold

        test_set = inputs[ix_test_first:ix_test_one_past_last]

        training_set = inputs[IX_MOD1:ix_test_first] + inputs[
            ix_test_one_past_last:len(inputs)]
        training_set_outputs = (outputs[IX_C_TOTAL:ix_test_first] +
                                outputs[ix_test_one_past_last:len(inputs)])

        #Get list of nearest neighbor indices for each object in the test set
        neighbors = knn.compute_nearest_neighbors(test_set, training_set, k)

        for i in range(0, len_fold):

            #Get the neighbors' output values
            neighbors_c_totals = []
            neighbors_cds = []
            neighbors_pbs = []
            for n in neighbors:
                neighbors_c_totals.append(
                    float(training_set_outputs[n][IX_C_TOTAL]))
                neighbors_cds.append(float(training_set_outputs[n][IX_CD]))
                neighbors_pbs.append(float(training_set_outputs[n][IX_PB]))

            #Mean value of the neigbors is the prediction
            estimate_c_total = mean(neighbors_c_totals)
            estimate_cd = mean(neighbors_cds)
            estimate_pb = mean(neighbors_pbs)

            #Store the predictions
            c_total_predictions.append(estimate_c_total)
            cd_predictions.append(estimate_cd)
            pb_predictions.append(estimate_pb)

    c_ix_c_total = knn.c_index([row[IX_C_TOTAL] for row in outputs],
                               c_total_predictions)
    c_ix_cd = knn.c_index([row[IX_CD] for row in outputs], cd_predictions)
    c_ix_pb = knn.c_index([row[IX_PB] for row in outputs], pb_predictions)

    print("C-index for c_totals: " + str(c_ix_c_total))
    print("C-index for cd: " + str(c_ix_cd))
    print("C-index for pb: " + str(c_ix_pb))
コード例 #2
0
def knn_classification_and_c_index(test_data, training_data, subject_id, k):
    test_labels = get_labels(test_data)
    training_labels = get_labels(training_data)

    test_features = get_features(test_data)
    training_features = get_features(training_data)

    # Get nearest neighbors for every object in test set
    neighbors = knn.compute_nearest_neighbors(test_features, training_features,
                                              k)

    predictions = []
    actuals = []
    for measurement in range(0, len(neighbors)):
        prediction = knn.majority_class(neighbors[measurement],
                                        training_labels)
        actual = test_labels[measurement]

        predictions.append(prediction)
        actuals.append(actual)

    c_ix = knn.c_index(actuals, predictions)
    print("(" + str(subject_id) + "," + str(c_ix) + ")")

    return (c_ix)
コード例 #3
0
def spatial_loo_cv(inputs, outputs, coordinates, delta, f_predict):
    '''
    Perform spatial leave-one-out cross-validation for data and call a
    prediction function.

    @param  inputs      2d list of the input data
    @param  outputs     1d list of the corresponding outputs
    @param  coordinates List of 2d coordinates of where the measurements were made
    @param  delta       Size of the dead zone. Same unit as in coordinates
    @param  f_predict   Function used to make the predictions

    @return C-index value of the predictions
    '''

    predictions = []
    for test_ix, test_inputs in enumerate(inputs):
        nearby_pnt_ixs = get_nearby_pnt_indices(inputs, test_ix, coordinates,
                                                delta)

        #Remove data of nearby points
        training_outputs = []
        training_inputs = []
        for ix, item in enumerate(inputs):
            if (ix != test_ix) and (ix not in nearby_pnt_ixs):
                training_inputs.append(inputs[ix])
                training_outputs.append(outputs[ix])

        #Make prediction for test set
        prediction = f_predict([test_inputs], training_inputs,
                               training_outputs)
        predictions.append(prediction)

    return (knn.c_index(outputs, predictions))
コード例 #4
0
def loo_cv_with_pairwise_filtering(features, labels, pairs, f_predict):
    '''
    Perform modified leave-one-out cross-validation, where no overlapping of items in
    the pairs list between test and training set is allowed.
    Do classifications and report the c-index.

    @param  features    2d list of feature values
    @param  labels      Correct labels corresponding to rows in features
    @param  pairs       List of pairs of each row in features. These are just names, not eg. row indices 
    @param  f_predict   Prediction function used for classifications

    @return Concordance-index for the made predictions 
    '''

    predictions = []
    misclassifications = 0
    for test_ix, test_features in enumerate(features):
        #Potential training features and labels
        training_features = [
            row for i, row in enumerate(features) if i != test_ix
        ]
        training_labels = [row for i, row in enumerate(labels) if i != test_ix]

        #Bind test index and pairs list to a function that tells whether or not
        #the pairs of a training object clash with the pairs of this test object
        f_can_add_to_trn_set = partial(pairs_disjoint,
                                       pairs=pairs,
                                       ix_1=test_ix)

        #Filter the features and labels
        training_features_filtered = filter_list(training_features,
                                                 f_can_add_to_trn_set)
        training_labels_filtered = filter_list(training_labels,
                                               f_can_add_to_trn_set)

        #Do prediction with the function that was given as an argument
        prediction = f_predict([test_features], training_features_filtered,
                               training_labels_filtered)
        predictions.append([prediction])

        #See if we guessed correctly
        actual = labels[test_ix][0]
        if actual != prediction:
            misclassifications += 1

    num_rows = len(features)
    if num_rows != 0:
        misclass_rate = float(misclassifications) / num_rows
        print("Classifications: " + str(num_rows))
        print("Misclassifications: " + str(misclassifications))
        print("Misclassification rate: " + str(misclass_rate))

    return (knn.c_index(labels, predictions))
コード例 #5
0
def loo_cv(features, labels, f_predict):
    '''
    Perform leave-one-out cross-validation, do classifications and report the c-index

    @param  features    2d list of feature values
    @param  labels      Correct labels corresponding to rows in features
    @param  f_predict   Prediction function used for classifications

    @return Concordance-index for the made predictions 
    '''

    predictions = []
    misclassifications = 0
    for test_ix, test_features in enumerate(features):
        training_features = [
            row for i, row in enumerate(features) if i != test_ix
        ]
        training_labels = [row for i, row in enumerate(labels) if i != test_ix]

        prediction = f_predict([test_features], training_features,
                               training_labels)
        predictions.append([prediction])

        #See if we guessed correctly
        actual = labels[test_ix][0]
        if actual != prediction:
            misclassifications += 1

    num_rows = len(features)
    if num_rows != 0:
        misclass_rate = float(misclassifications) / num_rows
        print("Classifications: " + str(num_rows))
        print("Misclassifications: " + str(misclassifications))
        print("Misclassification rate: " + str(misclass_rate))

    c_ix = knn.c_index(labels, predictions)
    return (c_ix)