Example #1
0
        if y_true[i] == pos:
            TP += 1
        else:
            FP += 1
    FPR = FP / num_neg
    TPR = TP / num_pos
    if verbose: print("{},{}".format(FPR, TPR))


if __name__ == "__main__":

    if len(sys.argv) == 1:
        k = 30
        train = "datasets/votes_train.json"
        test = "datasets/votes_test.json"
    else:
        k = int(sys.argv[1])
        train = str(sys.argv[2])
        test = str(sys.argv[3])

    # parse the json files for data
    X_train, y_train, meta_train = parse_json(train)
    X_test, y_test, meta_test = parse_json(test)

    # fit KNN and predict confidence
    knn = KNNClassifier(k=k)
    knn.fit(X_train, y_train, meta_train)
    y_conf = knn.predict(X_test, verbose=False, confidence=True)

    roc_curve(y_test, y_conf, meta_test, verbose=True)
        max_k = int(sys.argv[1])
        train = str(sys.argv[2])
        val = str(sys.argv[3])
        test = str(sys.argv[4])

    # parse the json files for data
    X_train, y_train, meta_train = parse_json(train)
    X_val, y_val, meta_val = parse_json(val)
    X_test, y_test, meta_test = parse_json(test)

    # train classifier on TRAIN, predict on VAL (for k=1,2,...,max_k)
    acc = {}
    for k in range(1, max_k + 1):
        knn = KNNClassifier(k=k)
        knn.fit(X_train, y_train, meta_train)
        y_pred = knn.predict(X_val, verbose=False)

        acc[k] = accuracy_score(y_val, y_pred)
        print("{},{}".format(k, acc[k]))

    best_k = max(acc, key=lambda key: acc[
        key])  # note that 'max' always returns first value in case of ties
    print(best_k)

    # train on TRAIN + VAL, predict on TEST
    knn_best = KNNClassifier(k=best_k)

    X_train_val = pd.concat([X_train, X_val], ignore_index=True)
    y_train_val = pd.concat([y_train, y_val], ignore_index=True)

    knn_best.fit(X_train_val, y_train_val, meta_train)
Example #3
0
    # Create the k-NN object.
    knn = KNNClassifier(train_X[:, 1:], train_y[:, 1:], metric='euclidean')

    # Iterate through all possible values of k:
    for k in range(min_k, max_k + 1):

        knn.set_k(k)

        # 1. Perform KNN training and classify all the test points. In this step, you will
        # obtain a prediction for each test point.

        y_pred = []

        for i in range(test_X.shape[0]):
            result = knn.predict(test_X[i, 1:])
            if result:
                y_pred.append(result)
            else:
                knn.set_k(k - 1)
                y_pred.append(knn.predict(test_X[i, 1:]))
                knn.set_k(k)

        y_pred = np.array(y_pred)

        # 2. Compute performance metrics given the true-labels vector and the predicted-
        # labels vector (you might consider to use obtain_performance_metrics() function)

        perf = obtain_performance_metrics(test_y[:, 1], y_pred)

        # 3. Write performance results in the output file, as indicated the in homework
import pandas as pd

if __name__ == "__main__":

    if len(sys.argv) == 1:
        k = 10
        train = "datasets/votes_train.json"
        test = "datasets/votes_test.json"
    else:
        k = int(sys.argv[1])
        train = str(sys.argv[2])
        test = str(sys.argv[3])

    # parse the json files for data
    X_train, y_train, meta_train = parse_json(train)
    X_test, y_test, meta_test = parse_json(test)

    for i in range(10):
        N = X_train.shape[0]
        ind = math.floor(
            (i + 1) * N / 10 - 1)  # subtract 1 since indexing starts at 0

        knn = KNNClassifier(k=k)
        knn.fit(X_train.ix[0:ind, :], y_train.ix[0:ind], meta_train)
        y_pred = knn.predict(X_test, verbose=False)

        acc = accuracy_score(y_test, y_pred)

        print(X_train.ix[0:ind, :].shape[0], end="")
        print(",{}".format(acc))