Beispiel #1
0
 def test_minkowski_distance(self):
     """Test to check that minkowski distance is correct"""
     knn = Knn(n_neighbors=3, p=5)
     knn.fit(np.array(little_X), little_Y)
     d = knn._minkowski_distance(np.array([3, 4]))
     assert np.allclose(
         d, [2.01234, 6.419382]), "Minkowski Distance is not correct"
Beispiel #2
0
def knn_train_test(k, xTrain, yTrain, xTest, yTest):
    """
    Given a specified k, train the knn model and predict
    the labels of the test data. Returns the accuracy of
    the resulting model.

    Parameters
    ----------
    k : int
        The number of neighbors
    xTrain : nd-array with shape n x d
        Training data 
    yTrain : 1d array with shape n
        Array of labels associated with training data.
    xTest : nd-array with shape m x d
        Test data 
    yTest : 1d array with shape m
        Array of labels associated with test data.

    Returns
    -------
    acc : float
        The accuracy of the trained knn model on the test data
    """
    model = Knn(k)
    model.train(xTrain, yTrain['label'])
    # predict the test dataset
    yHatTest = model.predict(xTest)
    return accuracy(yHatTest, yTest['label'])
Beispiel #3
0
def loopPcaKnn(loop=1):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisPCA('iris_data_set/iris.data')
        iris_data.randomSplit(35)
        # print (np.array(iris_data.irisdata))
        # print (np.array(iris_data.test_data))
        # get means and Standard deviation for training data
        iris_data.calTrainMeanSd(iris_data.train_data)
        # print ("Mean: \n", iris_data.train_mean)
        # print ("Standard deviation: \n", iris_data.train_standard_deviation)
        # apply Z score normalize for training data
        # print (np.array(iris_data.train_data))
        iris_data.zScoreNormalize(iris_data.train_data)
        # np.set_printoptions(precision=3)
        # print (np.array(iris_data.train_data))
        # get Projection Matrix W
        iris_data.calProjectionMatrixW(number_of_conponent=2)
        # print ("\nProjection Matrix W: \n", np.array(iris_data.projectionMatrixW))
        # apply Z score normalize for testing data
        # print (np.array(iris_data.test_data))
        iris_data.zScoreNormalize(iris_data.test_data)
        # print (np.array(iris_data.test_data))
        new_train_data = iris_data.getProjectedData(iris_data.train_data)
        new_test_data = iris_data.getProjectedData(iris_data.test_data)
        # print (np.array(new_train_data))
        # print (np.array(new_test_data))
        knn = Knn()
        # print ("Round ",i+1, " 3-NN accuracy: ", format(knn.kNearestNeighbors(new_train_data, new_test_data), ".3f"))
        accuracy += knn.kNearestNeighbors(new_train_data, new_test_data)
    return accuracy / loop
Beispiel #4
0
class PublicApi():
        def __init__(self):
                self.app = flask.Flask(__name__)
                CORS(self.app)
                self.app.config["DEBUG"] = True
                self.knn = Knn(3)                

        def start(self):
                @self.app.route('/api/predict', methods=['POST'])
                def api_id():    
                        logging.info(request.args)
                        if 'radiant_score' in request.args and 'dire_score' in request.args and 'duration' in request.args:
                                radiant_score = int(request.args['radiant_score'])
                                dire_score = int(request.args['dire_score'])
                                duration = int(request.args['duration'])
                                self.knn.set_k(int(request.args['k']))
                                result = self.knn.predict([[radiant_score,dire_score,duration]])
                                logging.info(result[0])
                                text_result = ''
                                logging.error(self.knn.k)
                                if result[0] == 1:
                                        text_result = 'Radiant wins!'
                                else:
                                        text_result = 'Dire wins!'                                        
                                        
                                self.knn.plot2D(radiant_score-dire_score,duration,text_result)
                                return str('{"result": \"'+text_result+'\"}')
                        else:
                                return "Missed a field"
                self.app.run(host='0.0.0.0', port=8080)
Beispiel #5
0
def main():
    col_names = [
        'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'
    ]
    iris = pd.read_csv('./iris.data', header=None, names=col_names)
    iris_class = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
    iris['species_num'] = [iris_class[i] for i in iris.species]
    X = iris.drop(['species', 'species_num'], axis=1).to_numpy()
    y = iris.species_num.to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    kr = Knn(3)
    # knn = KNeighborsClassifier(3)

    # model = knn.fit(X_train, y_train)
    kr.fit(X_train, y_train)

    # p = model.predict(X_test)
    p2 = kr.predict(X_test)
    correct = 0
    total = 0
    for pred in zip(p2, y_test):
        if pred[0] == pred[1]:
            correct += 1
        total += 1
    print("acc :", correct / total)
Beispiel #6
0
 def test_uniform_weight_with_0(self):
     """Test to see that _distance_weights return a weight of 1/d for each distance"""
     knn = Knn(n_neighbors=3)
     distances = np.array([0, .3, 4])
     weights = knn._distance_weights(distances)
     assert np.allclose(weights, np.array([[1, 0], [1 / .3, .3], [
         1 / 4, 4
     ]])), "distance_weights are not correct when we have distances of 0"
Beispiel #7
0
 def test_distance_weight(self):
     """Test to see that _distance_weights return a weight of 1/d for each distance"""
     knn = Knn(n_neighbors=3)
     distances = np.array([2, .3, 4])
     weights = knn._distance_weights(distances)
     assert np.allclose(weights,
                        np.array([[1 / 2, 2], [1 / .3, .3],
                                  [1 / 4,
                                   4]])), "distance_weights are not correct"
Beispiel #8
0
def main():
    metro = Metro("list_of_moscow_metro_stations.csv", "list_of_moscow_metro_stations_changes.csv")

    print([station.name for station in metro.get_line('Сокольническая').stations])
    print(metro.get_line('D3'))

    knn = Knn(metro, ['Окружная', 'Кофе'], 0, 0)
    print(knn.compute('Окружная'))
    print(metro.get_length('Ховрино', 'Сокол'))
    return 0
Beispiel #9
0
    def __init__(self):
        self.points = []
        self.labels = []
        self.lines = []
        self.knn = Knn()

        self.root = Tk()
        self.root.title("KNN Demo")
        self.root.geometry('800x520')
        self.root.resizable(False, False)

        self.axis = Canvas(self.root, width=720, height=370)
        self.axis.grid(column=0, row=0, columnspan=5)
        self.axis.configure(bg='white')
        self.axis.bind('<Button 1>', self.clickCallback)
        self.axis_points = []
        self.axis_predictions = []

        self.debug_lbl = Label(self.root, text='Mensaje ==>')
        self.debug_lbl.grid(column=0, row=1)

        self.debug_msg = Label(self.root, text='-')
        self.debug_msg.grid(column=0, row=1, columnspan=5)

        self.init_btn = Button(self.root,
                               text='Reiniciar',
                               command=self.cleanHandler)
        self.init_btn.grid(column=5, row=2)

        self.class_lbl = Label(self.root, text='Clase ==>')
        self.class_lbl.grid(column=0, row=2)

        self.current_mark = self.MARKS[0]
        self.class_btn = Button(self.root,
                                text=self.current_mark,
                                command=self.classButtonHandler)
        self.class_btn.grid(column=1, row=2)

        self.mode_lbl = Label(self.root, text='MODO ==>')
        self.mode_lbl.grid(column=2, row=2)

        self.current_mode = self.MODES[0]
        self.mode_btn = Button(self.root,
                               text=self.current_mode,
                               command=self.modeButtonHandler)
        self.mode_btn.grid(column=3, row=2)

        self.k_lbl = Label(self.root, text='K ==>')
        self.k_lbl.grid(column=2, row=3)

        self.k_txt = Entry(self.root, width=2, text='3')
        self.k_txt.grid(column=3, row=3)

        self.root.mainloop()
Beispiel #10
0
def icaKnnTest():
    iris_data = IrisICA('iris_data_set/iris.data')
    iris_data.plotIrisData('iris data before ica')
    iris_data.applyIcaFromFullIris(number_components=4)
    energy_of_components = iris_data.getSortedComponentEnergy()
    train_data, test_data = iris_data.getTrainTestSet(energy_of_components[:2],
                                                      train_size=0.7)
    iris_data.plotIrisData('iris data after ica')
    knn = Knn()
    print(knn.kNearestNeighbors(train_data, test_data))
    plt.show()
Beispiel #11
0
    def test_k_5(self):
        """Test to compare our knn with Sklearn knn when k=5 and distance is euclidean"""
        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5)
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(
            prediction == prediction2), "Error testing knn with k=5"
Beispiel #12
0
    def test_k_5_distance_minkowski(self):
        """Test to compare our knn with Sklearn knn when k=5 and distance is minkowski with p=3"""
        knn = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=3)
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5, metric="minkowski", p=3)
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(prediction == prediction2
                          ), "Error testing knn (minkowski) with k=5 and p=3"
Beispiel #13
0
    def test_distance_weight_2(self):
        """Test to compare our knn with Sklearn when k=5 and weights are the inverse of distance"""
        knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5, weights='distance')
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(prediction == prediction2
                          ), "Error testing knn with k=5 and weights=distance"
Beispiel #14
0
def icaKnnLoop(loop=10):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisICA('iris_data_set/iris.data')
        iris_data.applyIcaFromFullIris(number_components=4)
        energy_of_components = iris_data.getSortedComponentEnergy()
        train_data, test_data = iris_data.getTrainTestSet(
            energy_of_components[:2], train_size=0.7)
        knn = Knn()
        current_accuracy = knn.kNearestNeighbors(train_data, test_data)
        accuracy += current_accuracy
        print('round ', i + 1, ' accuracy: ', current_accuracy)
    return accuracy / loop
def loopFaKnn(loop=1):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisFA('iris_data_set/iris.data')
        iris_data.randomSplit(35)
        iris_data.getProjectionMatrixW(k=2)
        new_train_data = iris_data.getProjectedData(iris_data.train_data)
        new_test_data = iris_data.getProjectedData(iris_data.test_data)
        # print (np.array(iris_data.train_data))
        # print (np.array(new_train_data))
        knn = Knn()
        # print ("Round ",i+1, " 3-NN accuracy: ", format(knn.kNearestNeighbors(new_train_data, new_test_data), ".3f"))
        accuracy += knn.kNearestNeighbors(new_train_data, new_test_data)
    return accuracy / loop
Beispiel #16
0
def run_knn(n, p, k):

    #initialize random data here
    if n > 0 and p > 0:
        X_train = np.random.rand(int(n * .8), p)
        y_train = np.concatenate((np.zeros(int(
            (n / 2) * .8)), np.ones(int((n / 2) * .8))))
        X_dev = np.random.rand(int(n * .2), p)

    one = str(datetime.datetime.now().time()).split(":")

    knn = Knn(k, X_dev, X_train, y_train)

    two = str(datetime.datetime.now().time()).split(":")

    # KNN cost = run() --> get_neighbors() + get_majority_vote()
    # = (for each X_dev example) * { [get its neighbors] + [then get the top vote] }
    # = (# of X_dev examples) * ...
    # ...{ [ (# of X_train * ) + (sort # of X_train) + (k)] + [ (k*2) + (k) ] }
    len_X_train = len(X_train)
    cost = len(X_dev) * (((len_X_train * (2 + 4 + p * 2)) +
                          (len_X_train * math.log(len_X_train)) + (k)) +
                         (k * 2 + k))

    time_diff = (float(two[0]) - float(one[0])) * 3600 + (
        float(two[1]) - float(one[1])) * 60 + (float(two[2]) - float(one[2]))

    print(
        str(n) + ", " + str(p) + ", " + str(k) + ", " + str(int(cost)) + ", " +
        str(time_diff))
Beispiel #17
0
def get_best_k_for_data_error_rate_normal(data_set_path, number_of_runs):
    """
    Does a search over the k space to find the best k value for classification based k-NN

    Runs the cross validation experiment with a set of data n number of runs.
    The results from the cross validation for each run of k is stored then averaged
    over the total number of runs.

    The result is then ploted
    """
    k_list = range(1, 11)

    train_mse_list_master = [0] * len(k_list)

    for x in range(number_of_runs):
        train_mse_list = []
        for k in k_list:
            knn = Knn(k, True)
            average_error_rate = Experiments.run_classification_experiment(
                data_set_path, k, knn)
            train_mse_list.append(average_error_rate)

        for index in range(len(train_mse_list_master)):
            train_mse_list_master[index] += train_mse_list[index]
    for index in range(len(train_mse_list_master)):
        train_mse_list_master[index] /= number_of_runs
    generate_validation_curves(k_list,
                               train_mse_list_master,
                               "Average Error Rate",
                               title="Number of K's vs Average Error Rate",
                               x_axis_label="# of k's",
                               y_axis_label="Error Rate")
Beispiel #18
0
def main():
    X_train, y_train, X_test, y_test = load_mnist()

    # data binarization
    # for i in tqdm(range(len(x_train))):
    #     for j in range(28):
    #         for k in range(28):
    #             x_train[i][j][k] = 1 if x_train[i][j][k] > 177 else 0
    # for i in tqdm(range(len(x_test))):
    #     for j in range(28):
    #         x_test[i][j].squeeze()
    #         for k in range(28):
    #             x_test[i][j][k] = 1 if x_test[i][j][k] > 177 else 0

    # plot data samples
    # plot = plt.subplots(nrows=4, ncols=5, sharex='all', sharey='all')[1].flatten()
    # for i in range(20):
    #     img = x_train[i]
    #     plot[i].set_title(y_train[i])
    #     plot[i].imshow(img, cmap='Greys', interpolation='nearest')
    # plot[0].set_xticks([])
    # plot[0].set_yticks([])
    # plt.tight_layout()
    # plt.show()

    knn = Knn()
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    correct = sum((y_test - y_pred) == 0)

    print('==> correct:', correct)
    print('==> total:', len(X_test))
    print('==> acc:', correct / len(X_test))

    # plot pred samples
    fig = plt.subplots(nrows=4, ncols=5, sharex='all',
                       sharey='all')[1].flatten()
    for i in range(20):
        img = X_test[i]
        fig[i].set_title(y_pred[i])
        fig[i].imshow(img, cmap='Greys', interpolation='nearest')
    fig[0].set_xticks([])
    fig[0].set_yticks([])
    plt.tight_layout()
    plt.show()
def processImage(request):
    source = str(
        os.path.join(os.path.dirname(__file__),
                     '../testsamples/cleantha.png').replace('\\', '/'))
    imagefilter = ImageFilter(source)
    #没有经过轮廓提取的
    vectorTarget = imagefilter.getVectorNormal()
    arrayTarget = imagefilter.getArrayNormal()
    #经过轮廓提取的
    #vectorTarget = imagefilter.getVectorNew()
    #arrayTarget = imagefilter.getArrayNew()

    knnmachine = Knn(3)
    knnmachine.test = vectorTarget
    knnresult, flag = knnmachine.getNumber()

    svmresult = svmdecision(arrayTarget)
    annresult = anndecision(vectorTarget)
    #pcaresult = pcadecision(vectorTarget)
    #ldaresult = ldadecision(vectorTarget)

    #感觉靠谱程度是svm > knn > ann

    #resultList = [knnresult, svmresult, annresult, pcaresult, ldaresult]
    resultList = [knnresult, svmresult, annresult]
    print resultList
    print flag
    result = -1
    if len(list(set(resultList))) == len(resultList):
        #result = resultList[0]
        result = int(sum(resultList) / len(resultList))
    elif resultList[0] != resultList[1] and resultList[1] == resultList[2]:
        if flag:
            result = resultList[0]
        else:
            result = max(set(resultList), key=resultList.count)
    elif resultList[0] == resultList[2] and resultList[0] != resultList[1]:
        if flag:
            result = resultList[0]
        else:
            result = resultList[1]
    else:
        result = max(set(resultList), key=resultList.count)
    return render_to_response("uploadnew.html", {'result': '%d' % int(result)})
Beispiel #20
0
def loopLdaKnn(loop=1):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisLDA('iris_data_set/iris.data')
        iris_data.randomSplit(35)
        iris_data.getMeansForEachClass(iris_data.train_data)
        iris_data.getScatterMatrices()
        iris_data.getTranformMatrixW()
        new_train_data = iris_data.getProjectedData(iris_data.train_data)
        new_test_data = iris_data.getProjectedData(iris_data.test_data)
        # print (np.array(iris_data.train_data))
        # print (np.array(new_train_data))
        knn = Knn()
        print(
            "Round ", i + 1, " 3-NN accuracy: ",
            format(knn.kNearestNeighbors(new_train_data, new_test_data),
                   ".3f"))
        accuracy += knn.kNearestNeighbors(new_train_data, new_test_data)
    return accuracy / loop
def processImage(request):
    source = str(os.path.join(os.path.dirname(__file__), '../testsamples/cleantha.png').replace('\\', '/'))
    imagefilter = ImageFilter(source)
    #没有经过轮廓提取的
    vectorTarget = imagefilter.getVectorNormal()
    arrayTarget = imagefilter.getArrayNormal()
    #经过轮廓提取的
    #vectorTarget = imagefilter.getVectorNew()
    #arrayTarget = imagefilter.getArrayNew()

    knnmachine = Knn(3)
    knnmachine.test = vectorTarget
    knnresult, flag = knnmachine.getNumber()

    svmresult = svmdecision(arrayTarget)
    annresult = anndecision(vectorTarget)
    #pcaresult = pcadecision(vectorTarget)
    #ldaresult = ldadecision(vectorTarget)
    
    #感觉靠谱程度是svm > knn > ann

    #resultList = [knnresult, svmresult, annresult, pcaresult, ldaresult]
    resultList = [knnresult, svmresult, annresult]
    print resultList
    print flag
    result = -1
    if len(list(set(resultList))) == len(resultList):
        #result = resultList[0]
        result = int(sum(resultList)/len(resultList))
    elif resultList[0] != resultList[1] and resultList[1] == resultList[2]:
        if flag:
            result = resultList[0]
        else:
            result = max(set(resultList), key=resultList.count)
    elif resultList[0] == resultList[2] and resultList[0] != resultList[1]:
        if flag:
            result = resultList[0]
        else:
            result = resultList[1]
    else:
        result = max(set(resultList), key=resultList.count)
    return render_to_response("uploadnew.html", {'result': '%d' % int(result)})
Beispiel #22
0
def main():
    # Create a Knn object
    knn_classifier = Knn("diabetes.csv")

    # Set k as 20
    knn_classifier.set_k(20)
    knn_classifier.get_confusion_matrix()
    def clasificate_iris_to_test(self, sl, sw, pl, pw, n_neighbours):

        """funkcja klasyfikująca kwiat o podanych parametrach
        i danej liczbie sąsiadów"""

        if pw.replace('.', '', 1).isdigit() \
                and pl.replace('.', '', 1).isdigit() \
                and sw.replace('.', '', 1).isdigit() \
                and sl.replace('.', '', 1).isdigit() \
                and n_neighbours.isdigit():

            pw, pl, sw, sl, n_neighbours\
                = float(pw), float(pl), float(sw),\
                  float(sl), int(n_neighbours)

            if sl <= 8 and sw <= 8 and pl <= 8 and pw <= 8\
                    and 0 <= sl and 0 <= sw and 0 <= pl and 0 <= pw:

                knn = Knn(n_neighbours)
                knn.knn_test(np.array([[pw, pl, sw, sl]]), self.data,
                             self.target)
                knn.save_plot(np.array([[pw, pl, sw, sl]]), self.data,
                              self.target)
                return True
            else:
                return False
        else:
            return False
Beispiel #24
0
def main():
    X_train, y_train, X_test, y_test = load_mnist()

    knn = Knn()
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    correct = sum((y_test - y_pred) == 0)

    print('==> correct:', correct)
    print('==> total:', len(X_test))
    print('==> acc:', correct / len(X_test))

    # plot pred samples
    fig = plt.subplots(nrows=4, ncols=5, sharex='all',
                       sharey='all')[1].flatten()
    for i in range(20):
        img = X_test[i]
        fig[i].set_title(y_pred[i])
        fig[i].imshow(img, cmap='Greys', interpolation='nearest')
    fig[0].set_xticks([])
    fig[0].set_yticks([])
    plt.tight_layout()
    plt.show()
Beispiel #25
0
Datei: main.py Projekt: mles2/UM
def display_f_test(inputs_from_feature_selection, dataset):
    print("ftest results: (f, p) =")
    print(
        " NN, KNN: ",
        combined_ftest_5x2cv(NeuralNet(10000).clf,
                             Knn(2).clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    print(
        " NN, SVM: ",
        combined_ftest_5x2cv(NeuralNet(10000).clf,
                             Svm().clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    print(
        " KNN, SVM: ",
        combined_ftest_5x2cv(Knn(2).clf,
                             Svm().clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    def run(self, x, nbit, resolution, error):

        nbits = str(nbit)
        test_data = np.load("./data/" + nbits + "bit" + "/" + nbits + "bit" +
                            "_" + str(resolution) + "x" + str(resolution) +
                            "_test_images.npy")
        train_data = np.load("./data/" + nbits + "bit" + "/" + nbits + "bit" +
                             "_" + str(resolution) + "x" + str(resolution) +
                             "_train_images.npy")

        x = int(x)
        if (error != 0):
            test_data = np.array(
                [self.pixel_bit_error(error, i, nbit) for i in test_data])

        if (x == 1):
            generations = input("enter the number of generations")
            batchSize = input("enter the size of each batch")
            generations = int(generations)
            batchSize = int(batchSize)
            Jesus = Cnn()
            Jesus.run(train_data, test_data, resolution, error, generations,
                      batchSize)
        if (x == 2):
            if (error != 0):
                train_data = np.array(
                    [self.pixel_bit_error(error, i, nbit) for i in train_data])
            Jesus = Svm()
            Jesus.run(train_data, test_data, resolution)
        if (x == 3):
            if (error != 0):
                train_data = np.array(
                    [self.pixel_bit_error(error, i, nbit) for i in train_data])
            k = input("k ?")
            k = int(k)
            Jesus = Knn(k)
            Jesus.run(train_data, test_data, resolution)
        if (x == 4):
            Jesus = Caliente([], error)
            batchSize = input("enter the size of each batch")
            generations = input("enter the number of generations")
            generations = int(generations)
            batchSize = int(batchSize)
            Jesus.run(train_data, test_data, resolution, generations,
                      batchSize)
Beispiel #27
0
class CondensedNN:
    def __init__(self):
        """
        Use k-NN with k=1 to find the closes point in Z for condensing
        """
        self.knn = Knn(1, True)

    def condense(self, training_set):
        """
        Create the condensed model from the training set.

        Start with an empty set Z.
        While Z continues to change, repeat the following:
            For each data point in the training set x', find a point x^''in Z that is the closest point in Z to x'.
            If the labels of x^' and x'' are not the same, add x' to Z


        :param training_set: data points to select the condensed model from.
        :return: list of data points, the condensed model
        """
        random.shuffle(training_set)

        z = []
        previous_z = None

        while z != previous_z:
            previous_z = copy.deepcopy(z)
            for datapoint in training_set:
                if not z:
                    z.append(datapoint)

                nearest_z_class = self.find_nearest_z_class(z, datapoint)
                if datapoint[-1] != nearest_z_class:
                    z.append(datapoint)
        return z

    def find_nearest_z_class(self, z, datapoint):
        """
        Use the k-NN code with k = 1 to find the closest point in Z to the data point in question.
        :param z: Selected data points for condensed model
        :param datapoint: data point to query in Z
        :return: returns the label of the closest point
        """
        return self.knn.learn(z, datapoint)
Beispiel #28
0
def main():
    examen = Examen2()
    #KNN
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(Knn())
    print("-------------------------------------------")
    print("Resultados de KNN:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    #MLP
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(MLP())
    print("-------------------------------------------")
    print("Resultados de MLP:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    #Kmeans
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(KM())
    print("-------------------------------------------")
    print("Resultados de KMeans:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    #OCKRA
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(OCKRA())
    print("-------------------------------------------")
    print("Resultados de OCKRA:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    def run(self, alg, bean_list, filename="img.png", weighted=False):
        accuracy = []
        #self.set_train_test_set(generator,bean_list)
        for k in self.k_values:
            for train in self.train_set:
                # Train
                alg.train([bean_list[a] for a in train])
            for test in self.test_set:
                # Test
                result = alg.teste([bean_list[a] for a in test], k=k)

                # Result
                accuracy.append(Knn.accuracy(result))
                print("acuracy knn {0} with k {1}".format(accuracy[-1], k))

            self.mean_accuracy(accuracy)

        # Plot the graphic
        Visualization.hit_rate_per_k(self.mean_accuracy_list, self.k_values,
                                     filename, weighted)
Beispiel #30
0
def run_knn(n, p, k, parallel, num_procs):
    #initialize random data here
    if n > 0 and p > 0:
        np.random.seed(42)
        X_train = np.random.rand(int(n * .8), p)
        y_train = np.concatenate((np.zeros(int(
            (n / 2) * .8)), np.ones(int((n / 2) * .8))))
        X_dev = np.random.rand(int(n * .2), p)
        y_dev = np.concatenate((np.zeros(int(
            (n / 2) * .2)), np.ones(int((n / 2) * .2))))

    start = time.time()

    if parallel:
        knn = KnnParallel(k, X_dev, X_train, y_train, num_procs)
    else:
        knn = Knn(k, X_dev, X_train, y_train)

    end = time.time()

    predictions = knn.predictions
    correct = 0
    for pred, actual in zip(predictions, y_dev):
        if int(pred) == int(actual):
            correct += 1

    accuracy = correct / len(predictions)

    # KNN cost = run() --> get_neighbors() + get_majority_vote()
    # = (for each X_dev example) * { [get its neighbors] + [then get the top vote] }
    # = (# of X_dev examples) * ...
    # ...{ [ (# of X_train * ) + (sort # of X_train) + (k)] + [ (k*2) + (k) ] }
    len_X_train = len(X_train)
    cost = int( len(X_dev) * ( ( (len_X_train*(2 + 4 + p*2)) + \
     (len_X_train * math.log(len_X_train)) + (k) ) + (k*2 + k) ) )

    time_diff = end - start

    print("{}, {}, {}, {}, {}, {}, {}, {}".format(n, p, k, cost, time_diff,
                                                  parallel, num_procs,
                                                  accuracy))
Beispiel #31
0
def get_best_k_for_data_mse(data_set_path, number_of_runs):
    """
    Does a search over the k space to find the best k value for regression based k-NN

    Runs the cross validation experiment with a set of data n number of runs.
    The results from the cross validation for each run of k is stored then averaged
    over the total number of runs.

    The result is then ploted
    """
    k_list = range(1, 11)

    train_mse_list_master = [0] * len(k_list)

    for x in range(number_of_runs):
        train_mse_list = []

        # compute performance for each k value
        for k in k_list:
            knn = Knn(k, False)
            average_mse = Experiments.run_experiment_regression(
                data_set_path, k, knn)
            train_mse_list.append(average_mse)

        # add values to the master list of k values to be averaged later
        for index in range(len(train_mse_list_master)):
            train_mse_list_master[index] += train_mse_list[index]

    # average all of the values for each k tested by the number of times k was tested
    for index in range(len(train_mse_list_master)):
        train_mse_list_master[index] /= number_of_runs

    generate_validation_curves(k_list,
                               train_mse_list_master,
                               "Average Mean Squared Error",
                               title="Number of K's vs AMSE",
                               x_axis_label="# of k's",
                               y_axis_label="MSE")
Beispiel #32
0
     ignore_closest = False
     
 debug = False
 dummy = False
 
 print('Reading model..')
 if not dummy:
     model_reader = ModelReader(model_params_filename)
     model = model_reader.model
 else:
     model = DummyContextModel()
 dataset_reader = DatasetReader(model)
 
 print('Reading train dataset..')
 train_set, train_key2ind, train_ind2key = dataset_reader.read_dataset(train_filename, train_filename+'.key', True, isolate_target_sentence)
 knn = Knn(k, train_set, train_key2ind)
 
 print('Reading test dataset..')
 test_set, test_key2ind, test_ind2key = dataset_reader.read_dataset(test_filename, test_filename+'.key', False, isolate_target_sentence)
 
 print('Starting to classify test set:')
 with open(result_filename, 'w') as o:
     for ind, key_set in enumerate(test_set):
         key = test_ind2key[ind]
         if debug:
             print('KEY:', key)
             print()
         for instance_id, vec, text in zip(key_set.instance_ids, key_set.context_m, key_set.contexts_str):
             if debug:
                 print('QUERY:', text.strip())
             result = knn.classify(key, vec, ignore_closest, debug)