def knn_cross_val_score(X, y, k_list, score='accuracy', cv=None, **kwargs):
    """
    :param X: train samples
    :param y: targets for train
    :param k_list: list of values of neighbors amount, in ascending order
    :param score: metric name( accuracy' must have)
    :param cv: list of tuples, which contains indices of train and valid samples
    :param kwargs: parameters for __init__ from KNNClassifier
    :return: dict, where keys is neigbors amount from k_list, values - numpy array of size len(cv)
    with accuracy on each fold
    """

    if cv is None:
        cv = kfold(X.shape[0])
    knn = None
    metric_per_k = {k: np.empty(0) for k in k_list}
    for fold in cv:
        if 'k' not in kwargs.keys():
            knn = KNNClassifier(k_list[-1], **kwargs)
        else:
            knn = KNNClassifier(**kwargs)
        knn.fit(X[fold[0]], y[fold[0]])
        knn.find_kneighbors(X[fold[1]], return_distance=True)
        for k in k_list:
            knn.k = k
            y_valid = knn.predict_for_cv(X[fold[1]])
            if score == "accuracy":
                metric_per_k[k] = np.append(metric_per_k[k],
                                            accuracy(y_valid,
                                                   y[fold[1]]))
    return metric_per_k
def knn_cross_val_score(X, y, k_list, score, cv=None, **kwargs):
    res = {}
    for k in k_list:
        knn = KNNClassifier(k, **kwargs)
        res[k] = []
        kf = KFold(k)
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            knn.fit(X_train, y_train)
            my_y = knn.predict(X_test)
            correct = sum([1 for i, v in enumerate(my_y) if y_test[i] is v])
            res[k].append(correct / len(my_y))

    return res
Example #3
0
def knn_cross_val_score(X, y, k_list, score='accuracy', cv=None, **kwargs):
    k_list = list(k_list)
    X = X.astype(float)
    y = y.astype(int)
    if cv is None:
        cv = kfold(len(X), 3)

    res = defaultdict(lambda: np.zeros(len(cv), dtype=float))
    for t, (train, test) in enumerate(cv):
        model = KNNClassifier(k=k_list[-1], **kwargs)
        model.fit(X[train], y[train])
        if not kwargs['weights']:
            neighbors = model.find_kneighbors(X[test], return_distance=False)
            weights = np.ones_like(neighbors, dtype=float)
        else:
            dists, neighbors = model.find_kneighbors(X[test],
                                                     return_distance=True)
            weights = 1 / (dists + 1e-5)
        labels = y[train][neighbors]
        for i, (cur_labels, cur_w) in enumerate(zip(labels, weights)):
            cnt = np.zeros(cur_labels.max() + 1)
            maxa, maxy = 0, cur_labels[0]
            for prev_k, cur_k in zip([0] + k_list, k_list):
                for label, w in zip(cur_labels[prev_k:cur_k],
                                    cur_w[prev_k:cur_k]):
                    cnt[label] += w
                    if cnt[label] > maxa:
                        maxa, maxy = cnt[label], label
                res[cur_k][t] += (maxy == y[test][i])
        for k in k_list:
            res[k][t] /= len(test)
    return dict(res)
Example #4
0
def knn_cross_val_score(X, y, k_list, score='accuracy', cv=None, **kwargs):
    if cv is None:
        cv = kfold(X.shape[0], 3)
    ans = {}
    for k in k_list:
        ans[k] = np.empty(len(cv))
    knn = KNNClassifier(k=k_list[-1], **kwargs)
    for index, fold in enumerate(cv):
        knn.fit(X[fold[0], :], y[fold[0]])
        curr_dist, curr_neighbors = knn.find_kneighbors(X=X[fold[1]], return_distance=True)
        if knn.weights:
            weights = (curr_dist + (10 ** (-5))) ** (-1)
        else:
            weights = np.ones(curr_neighbors.shape)
        classes = np.unique(y[fold[0]])
        for k in k_list[-1::-1]:
            curr_weights = weights[:, :k]
            curr_neighbors = curr_neighbors[:, :k]
            res = online_predict(curr_weights, y[fold[0]], k, curr_neighbors, classes, fold)
            ans[k][index] = np.ones(res.shape[0], dtype=int)[res == y[fold[1]]].sum() \
                / res.shape[0]
    return ans
Example #5
0
def knn_cross_val_score(X, y, k_list, score, cv, **kwargs):
    if (cv is None):
        cv = kfold(np.size(X, 0), 3)
    score_dict = dict()
    curr_ind = list()
    weight_flag = kwargs['weights']
    if (weight_flag):
        curr_weights = list()
    curr_score = np.empty(len(cv))
    for j in range(len(cv)):
        classifier = KNNClassifier(k_list[len(k_list) - 1], **kwargs)
        classifier.fit(X[cv[j][0]], y[cv[j][0]])
        y_knn = classifier.predict(X[cv[j][1]])
        if (weight_flag):
            curr_w, curr_i = classifier.find_kneighbors(X[cv[j][1]], True)
            curr_weights.append(curr_w)
            curr_ind.append(curr_i)
        else:
            curr_i = classifier.find_kneighbors(X[cv[j][1]], False)
            curr_ind.append(curr_i)
        if (score == 'accuracy'):
            num_diff = np.sum(y_knn == y[cv[j][1]])
            curr_score[j] = num_diff / np.size(y_knn)
    score_dict[k_list[len(k_list) - 1]] = curr_score
    for i in range(len(k_list) - 2, -1, -1):
        k_diff = k_list[i + 1] - k_list[i]
        curr_score = np.empty(len(cv))
        for j in range(len(cv)):
            curr_ind[j] = curr_ind[j][:, :-k_diff]
            if (weight_flag):
                curr_weights[j] = curr_weights[j][:, :-k_diff]
                y_ind = y[cv[j][0]][curr_ind[j]]
                y_knn = np.empty(np.size(y_ind, 0))
                for k in range(np.size(y_ind, 0)):
                    y_knn[k] = np.argmax(
                        np.bincount(y_ind[k],
                                    weights=1 / (curr_weights[j][k] + 1e-5)))
            else:
                y_ind = y[cv[j][0]][curr_ind[j]]
                y_knn = np.empty(np.size(y_ind, 0))
                for k in range(np.size(y_ind, 0)):
                    y_knn[k] = np.argmax(np.bincount(y_ind[k]))
            if (score == 'accuracy'):
                num_diff = np.sum(y_knn == y[cv[j][1]])
                curr_score[j] = num_diff / np.size(y_knn)
        score_dict[k_list[i]] = curr_score
    return score_dict
def knn_cross_val_score(X, y, k_list, score, cv, **kwargs):
    if not cv:
        cv = kfold(X.shape[0], 2)
    result = {}

    max_k = k_list[len(k_list) - 1]
    for i, data in enumerate(cv):
        train = data[0]
        test = data[1]

        clf = KNNClassifier(k=max_k, **kwargs)
        clf.fit(X[train], y[train])

        if clf.weights:
            dist, idx = clf.find_kneighbors(X[test], True)
            nearest_labels = y[train][idx]

            scores = [{} for j in range(dist.shape[0])]
            for k in range(1, max_k + 1):
                answers = np.zeros(dist.shape[0], dtype=np.int32)
                for j in range(dist.shape[0]):
                    if nearest_labels[j, k - 1] not in scores[j].keys():
                        scores[j][nearest_labels[j, k - 1]] = 0
                    scores[j][nearest_labels[j, k -
                                             1]] += 1 / (dist[j, k - 1] + 1e-5)

                    answers[j] = max(scores[j], key=scores[j].get)

                if k in k_list:
                    if k not in result.keys():
                        result[k] = np.zeros(len(cv))
                    result[k][i] = _accuracy(answers, y[test])
        else:
            idx = clf.find_kneighbors(X[test], False)
            nearest_labels = y[train][idx]

            scores = [{} for j in range(test.shape[0])]
            for k in range(1, max_k + 1):
                answers = np.zeros(test.shape[0], dtype=np.int32)
                for j in range(test.shape[0]):
                    if nearest_labels[j, k - 1] not in scores[j].keys():
                        scores[j][nearest_labels[j, k - 1]] = 0
                    scores[j][nearest_labels[j, k - 1]] += 1

                    answers[j] = max(scores[j], key=scores[j].get)

                if k in k_list:
                    if k not in result.keys():
                        result[k] = np.zeros(len(cv))
                    result[k][i] = _accuracy(answers, y[test])
    return result
def knn_cross_val_score_with_aug_for_train(X,
                                 y,
                                 new_objects_amount=10000,
                                 type_of_transformation='rotation',
                                 param_of_transformation=0,
                                 score='accuracy',
                                 k_list=None,
                                 cv=None,
                                 metric='cosine',
                                 strategy='brute',
                                 weights=True,
                                 k_folds=3
                                 ):
    if cv is None:
        cv = kfold(X.shape[0], k_folds)
    knn = None
    metric_per_k = {k: np.empty(0) for k in k_list}
    for fold in cv:
        knn = KNNClassifier(k_list[-1], metric=metric, strategy=strategy, weights=weights)
        X_train_aug, y_train_aug = \
            augmentation_tools.made_augmentation(X[fold[0]],
                                                 y[fold[0]],
                                                 new_objects_amount=new_objects_amount,
                                                 type_of_transformation=type_of_transformation,
                                                 param_of_transformation=param_of_transformation
                                                 )
        knn.fit(X_train_aug, y_train_aug)
        knn.find_kneighbors(X[fold[1]], return_distance=True)
        for k in k_list:
            knn.k = k
            y_valid = knn.predict_for_cv(X[fold[1]])
            if score == "accuracy":
                metric_per_k[k] = np.append(metric_per_k[k],
                                            accuracy(y_valid.astype(int),
                                                     y[fold[1]].astype(int)
                                                     )
                                            )
    return metric_per_k
Example #8
0
def knn_cross_val_score(x, y, k_list, score, cv, **kwargs):
    if cv == None:
        cv = kfold(x.shape[0], 3)
    answer = {}
    dop_answer = np.zeros(len(cv) * len(k_list)).reshape(len(k_list), -1)
    model = KNNClassifier(k=max(k_list), **kwargs)
    for j, data in enumerate(cv):
        train_set = x[data[0]]
        train_target = y[data[0]]
        test_set = x[data[1]]
        test_target = y[data[1]]
        model.fit(train_set, train_target)
        if kwargs['weights']:
            dist, ind = model.find_kneighbors(test_set, return_distance=True)
            votes = 1 / (dist + 0.00001)
        else:
            ind = model.find_kneighbors(test_set, return_distance=False)
        for i, k in enumerate(k_list):
            sub_answer = np.zeros(test_set.shape[0])
            if kwargs['weights']:
                k_votes = votes[:, :k]
            k_ind = ind[:, :k]
            if kwargs['weights']:
                for q in range(len(test_set)):
                    ind_array = np.zeros(10)
                    for num in range(k_ind.shape[1]):
                        ind_array[train_target[k_ind[q][num]].astype(int)] += k_votes[q][num]
                    sub_answer[q] = np.argmax(ind_array)
            else:
                for q in range(len(test_set)):
                    ind_array = np.zeros(10)
                    for num in range(k_ind.shape[1]):
                        ind_array[train_target[k_ind[q][num]].astype(int)] += 1
                    sub_answer[q] = np.argmax(ind_array)

            dop_answer[i][j] = 1 - len(np.where(test_target != sub_answer)[0]) / len(test_target)
    for i, k in enumerate(k_list):
        answer[k] = dop_answer[i]
    return answer
trX, teX, trY, teY = train_test_split(data, target, test_size=1/7, random_state=666)

teX = teX.reshape(teX.__len__(), 28, 28)
trX = trX.reshape(trX.__len__(), 28, 28)

teX_rotated = np.empty(teX.shape)
trX_rotated = np.empty(trX.shape)

for enum in range(teX.__len__()):
    teX_rotated[enum] = gaussian(teX[enum], sqrt(1.1), preserve_range=True)

for enum in range(trX.__len__()):
    trX_rotated[enum] = gaussian(trX[enum], sqrt(1.1), preserve_range=True)

print("Here")

teX = teX_rotated.reshape(teX_rotated.__len__(), 28 * 28)
trX = trX_rotated.reshape(trX_rotated.__len__(), 28 * 28)

model = KNNClassifier(4, "my_own", "cosine", True)

model.fit(trX, trY)
result = model.predict(teX)

print("Accuracy of best method is: ", ac_s(teY, result))

f = open("save_file_4.txt", "w")
for item in result:
    f.write(str(item))
Example #10
0
def knn_cross_val_score(X, Y, k_list, score, cv, **kwargs):

    if X.__len__() != Y.__len__():
        raise TypeError

    if cv is None:
        cv = kfold(X.__len__(), 3)

    if score == "accuracy":
        score_func = accuracy_score
    else:
        raise TypeError

    max_k = max(k_list)

    result = {}
    each_acc = np.empty([len(k_list), len(cv)])

    for enumer, fold in enumerate(cv):

        trX, trY = X[fold[0]], Y[fold[0]].astype(int)
        teX, teY = X[fold[1]], Y[fold[1]].astype(int)

        model = KNNClassifier(k=max_k, **kwargs)
        model.fit(trX, trY)
        clusters = np.sort(np.unique(trY))
        clusters_amount = np.unique(trY).__len__()

        distances, nearest = model.find_kneighbors(teX)

        test_target = np.empty(teX.__len__()).astype(int)

        for ite, it in enumerate(k_list):

            new_distances = distances[:, :it]
            new_nearest = nearest[:, :it]

            if "weights" in kwargs.keys():
                if kwargs["weights"]:
                    for enum in range(teX.__len__()):
                        cluster_nb = np.zeros(clusters_amount)
                        for numb in range(new_nearest.shape[1]):
                            cluster_nb[trY[new_nearest[
                                enum,
                                numb]]] += weight_function(new_distances[enum,
                                                                         numb])

                        test_target[enum] = clusters[np.argmax(cluster_nb)]
                else:
                    for enum in range(teX.__len__()):
                        cluster_nb = np.zeros(clusters_amount)
                        for numb in range(new_nearest.shape[1]):
                            cluster_nb[trY[new_nearest[enum, numb]]] += 1

                        test_target[enum] = clusters[np.argmax(cluster_nb)]
            else:

                for enum in range(teX.__len__()):
                    cluster_nb = np.zeros(clusters_amount)
                    for numb in range(new_nearest.shape[1]):
                        cluster_nb[trY[new_nearest[enum, numb]]] += 1

                    test_target[enum] = clusters[np.argmax(cluster_nb)]

            each_acc[ite, enumer] = score_func(teY, test_target)

    for i in range(len(k_list)):
        result[k_list[i]] = each_acc[i]

    return result
fea_subsample_sizes = (10, 20, 100)
strategies = ('my_own', 'brute', 'kd_tree', 'ball_tree')
metrics = ('euclidean', )

times_fit = defaultdict(list)
times_predict = defaultdict(list)
times_fit_predict = defaultdict(list)
for fea_subsample_size in fea_subsample_sizes:
    fea_idxs = np.random.randint(0, x.shape[1], [fea_subsample_size])
    x_train_tmp, x_test_tmp = x_train[:, fea_idxs], x_test[:, fea_idxs]
    for strategy in strategies:
        for metric in metrics:
            delta_1 = []
            delta_2 = []
            for _ in range(1):
                model = KNNClassifier(k=5, strategy=strategy, metric=metric)
                st = time.time()
                model.fit(x_train_tmp, y_train)
                st_predict = time.time()
                model.find_kneighbors(x_test_tmp, return_distance=False)
                en = time.time()
                delta_1.append(st_predict - st)
                delta_2.append(en - st_predict)
            print("Stratery: ", strategy, " Metric: ", metric)
            print("Fit: ", list_mean(delta_1))
            print("Predict: ", list_mean(delta_2))
            print("Fit + Predict: ", list_mean(delta_1) + list_mean(delta_2))
            times_fit[fea_subsample_size].append(list_mean(delta_1))
            times_predict[fea_subsample_size].append(list_mean(delta_2))
            times_fit_predict[fea_subsample_size].append(
                list_mean(delta_1) + list_mean(delta_2))
shuffle(all_descriptors)
selected_20 = np.copy(all_descriptors[:20])

shuffle(all_descriptors)
selected_100 = np.copy(all_descriptors[:100])

shuffle(all_descriptors)
selected_200 = np.copy(all_descriptors[:200])

print(trX[:, selected_10].shape)
print(trY.shape)

print("10 selected descriptors are {}\n20 - {}\n100 - {}".format(
    selected_10, selected_20, selected_100))

model = KNNClassifier(5, "my_own", "euclidean", False, 100)
model.fit(trX[:, selected_10], trY)
result_10 = model.predict(teX[:, selected_10])

model.fit(trX[:, selected_20], trY)
result_20 = model.predict(teX[:, selected_20])

model.fit(trX[:, selected_100], trY)
result_100 = model.predict(teX[:, selected_100])

model.fit(trX[:, selected_200], trY)
result_200 = model.predict(teX[:, selected_200])

print("Accuracy for: \n10 - {}\n20 - {}\n100 - {}\n200 - {}".format(
    ac_s(teY, result_10), ac_s(teY, result_20), ac_s(teY, result_100),
    ac_s(teY, result_200)))