Example #1
0
def test_classifier(data):
    # tests default attributes
    X, y = data
    aggregatedKNN = KNNAlgorithmM()
    assert aggregatedKNN.k_neighbours == (3, 5, 7)

    # tests adding special fields in learning
    aggregatedKNN.fit(X, y)
    assert hasattr(aggregatedKNN, 'classes_')
    assert hasattr(aggregatedKNN, 'X_')
    assert hasattr(aggregatedKNN, 'y_')

    # tests output shape
    y_pred = aggregatedKNN.predict(X)
    assert y_pred.shape == (X.shape[0], )
Example #2
0
def test_main_class_probability():

    test_decisions = np.array([0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1])
    expected_p = np.array([[1 / 3, 2 / 5, 3 / 7], [1 / 3, 3 / 5, 4 / 7],
                           [1, 3 / 5, 5 / 7]])
    a = KNNAlgorithmM()
    a.y_ = test_decisions
    sorted_distance = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
                                [5, 6, 1, 2, 3, 0, 7, 9, 8, 10, 11, 4],
                                [3, 7, 8, 9, 10, 11, 2, 1, 4, 0, 6, 5]])

    p = np.zeros(shape=(3, 3))
    j = 0
    for i in range(0, 3):
        j = 0
        for k in (3, 5, 7):
            p[i, j] = a._classic_knn(sorted_distance, k, i)
            j += 1

    assert_allclose(p, expected_p, rtol=0.1, atol=0.1)
Example #3
0
def test_measure():
    # create in-memory dataset
    bmi = pd.DataFrame.from_dict({
        'height': [1.6, 1.6, 1.62, 1.75, 1.7, 1.8, 1.9],
        'weight': [60, 80, 80, 90, 80, 85, 82],
        'bmi': [1, 0, 0, 0, 0, 1, 1]
    })
    print(bmi)
    aggregatedKNN = KNNAlgorithmM(k_neighbours=(1, 3))
    # fit a model
    model = aggregatedKNN.fit(bmi.iloc[:, :2], np.ravel(bmi.iloc[:, 2:3]))
    print(bmi.iloc[:, :2])
    print(np.ravel(bmi.iloc[:, 2:3]))
    # checking first object decision
    pred = model.predict([[1.6, 60]])
    print(pred)
    #assert pred == [1]
    # checking classification score (accuracy here)
    score = model.score([[1.6, 60], [1.59, 61], [1.6, 80]], [1, 1, 0])
    print(score)
    assert score == 1.0
Example #4
0
def test_m_dist_equivalence():
    X, y = load_iris(True)
    m = KNNAlgorithmM()
    m.fit(X, y)

    def full_distance(X, X_):
        distance = np.empty(shape=(X.shape[0], X_.shape[0]), dtype=np.float64)
        i = j = 0
        for test in X:
            for train in X_:
                distance[i][j] = m.euclidean_distance_with_missing_values(
                    test, train)
                j += 1
            i += 1
            j = 0
        return distance

    #assert_array_equal(m.euclidean_distance_with_missing_values(X, X),
    #                   m.euclidean_distance_with_missing_values_optimized(X, X))
    assert_array_equal(
        m.euclidean_distance_with_missing_values_optimized(X, X),
        full_distance(X, X))
Example #5
0
def test_computing_euclidean_distance_with_missing_values():
    dummy_train = np.array([[0.2, 0.5, 1.0], [-1, 0.4, 0.1], [0.2, 0.4, 0.4]])

    dummy_test = np.array([[0.8, 0.1, -1], [0.7, 0.2, 0.4], [0.6, -1, 0.8]])

    distance = euclidean_distances(dummy_test, dummy_train)
    print(distance)
    #assert_array_equal(expected_distance, distance)
    missing_indexes = KNNAlgorithmM().get_missing_values_indexes(dummy_test)
    print(missing_indexes)
    missing_train_indexes = KNNAlgorithmM().get_missing_values_indexes(
        dummy_train)
    print(missing_train_indexes)
    for k in missing_indexes.keys():
        for test in range(dummy_train.shape[0]):
            distance[int(k)][test] = KNNAlgorithmM(
            ).euclidean_distance_with_missing_values(dummy_test[test],
                                                     dummy_train[int(k)])
    print(distance)
    #for k in missing_train_indexes.keys():
    #    for train in range(dummy_test.shape[0]):
    #        distance[train][int(k)] = KNNAlgorithmM.euclidean_distance_with_missing_values(dummy_train[int(k)], dummy_test[train])
    #print(distance)

    i = j = 0
    for test in dummy_test:
        for train in dummy_train:
            distance[i][j] = KNNAlgorithmM(
            ).euclidean_distance_with_missing_values(test, train)
            j += 1
        i += 1
        j = 0
    print(distance)

    expected_distance = np.array([[1.232, 1.240, 0.9], [0.836, 0.787, 0.538],
                                  [0.67, 1.191, 0.824]])
    assert_array_equal(expected_distance, distance)
Example #6
0
                    .fit_transform(X)
                multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k))
                f_result = cross_validate(multiclassF, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10)
                w = pd.DataFrame({'algorithm': 'f', 'k': str(k), 'r': f_result['estimator'][0].estimator.r,
                              'agg': Aggregation.change_aggregation_to_name(f_result['estimator'][0].estimator.aggregation), 'missing': miss,
                                  'auc': np.mean(f_result['test_score']), 'stddev': stdev(f_result['test_score'])}, index=[ind])
                print(w)
                dfs.append(w)
                ind += 1
                concatenated = pd.concat(dfs)

concatenated = pd.concat(dfs)
concatenated.to_excel('concatenated_vertebral.xlsx')

for miss in missing:
    for k in ks:
        X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss) \
            .fit_transform(X)
        multiclassM = OneVsRestClassifier(KNNAlgorithmM(missing_representation=-1, k_neighbours=k))
        m_result = cross_validate(multiclassM, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10)
        w2 = pd.DataFrame({'algorithm': 'm', 'k': str(k), 'r': '',
                           'agg': '',
                           'missing': miss,
                           'auc': np.mean(m_result['test_score']), 'stddev': stdev(m_result['test_score'])},
                          index=[ind])
        print(w2)
        dfs.append(w2)
        ind += 1
concatenated = pd.concat(dfs)
concatenated.to_excel('concatenated_vertebral_m.xlsx')
                        stdev(f_result['test_score'])
                    },
                    index=[ind])
                print(w)
                dfs.append(w)
                ind += 1

concatenated = pd.concat(dfs)
concatenated.to_excel('concatenated_iris.xlsx')

for miss in missing:
    for k in ks:
        X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss) \
            .fit_transform(X)
        multiclassM = OneVsRestClassifier(
            KNNAlgorithmM(missing_representation=-1, k_neighbours=k))
        m_result = cross_validate(multiclassM,
                                  X_missing,
                                  y,
                                  scoring='roc_auc_ovo',
                                  return_estimator=True,
                                  cv=10)

        w2 = pd.DataFrame(
            {
                'algorithm': 'm',
                'k': str(k),
                'r': '',
                'agg': '',
                'missing': miss,
                'auc': np.mean(m_result['test_score']),
Example #8
0
def test_sorting():
    distance = np.array([[0.8653, 0.6731, 0.9863, 0.3042, 0.9, 0.452],
                         [0.1231, 0.8763, 0.0112, 0.7633, 0.912, 0.341]])
    tested = KNNAlgorithmM().take_k_smallest_and_sort_distances(distance, 5)

    assert_array_equal(tested, np.argsort(distance)[:, :5])