Esempio n. 1
0
    def k_nearest_neighbours(self):
        """
        for knn, i train on the training data using different :
            1) n_neighbors,
            2) weights
        :return: test accuracy of the knn best model
        """
        # define parameters
        #         n_neighbors = np.logspace(start=2, stop=6, base=2, num=5, dtype=np.int)
        #         weights = ('distance', 'uniform')
        # best result over all n_neighbors: 32
        # best result over all weights: 'distance'

        # scale down parameters around its best result
        np.random.seed(0)
        n_neighbors = norm.rvs(loc=32, scale=10, size=3).astype(np.int)
        weights = ('distance', 'uniform')

        # get the best validated model
        knn = K_nearest_neighbours(x_train=self.x_train,
                                   y_train=self.y_train,
                                   cv=5,
                                   n_neighbors=n_neighbors,
                                   weights=weights,
                                   grid_search=True)

        # print all possible parameter values and the best parameters
        # knn.print_parameter_candidates()
        # knn.print_best_estimator()

        # return the accuracy score
        return (knn.evaluate(data=self.x_train, targets=self.y_train),
                knn.evaluate(data=self.x_test, targets=self.y_test))
Esempio n. 2
0
    def k_nearest_neighbours(self):
        n_neighbors = range(1, 100, 1)  # [1, 3, 5, ..., 99]
        knn = K_nearest_neighbours(x_train=self.x_train,
                                   y_train=self.y_train,
                                   cv=3,
                                   n_jobs=-1,
                                   n_neighbors=n_neighbors,
                                   grid_search=True)

        knn.print_parameter_candidates()
        knn.print_best_estimator()

        return (knn.evaluate(data=self.x_train, targets=self.y_train),
                knn.evaluate(data=self.x_test, targets=self.y_test))
    def k_nearest_neighbours(self):
        weights = ['uniform', 'distance']
        n_neighbors = range(3, 15)

        knn = K_nearest_neighbours(x_train=self.x_train,
                                   y_train=self.y_train,
                                   cv=3,
                                   n_neighbors=n_neighbors,
                                   weights=weights,
                                   grid_search=True)

        #knn.print_parameter_candidates()
        #knn.print_best_estimator()

        return (knn.evaluate(data=self.x_train, targets=self.y_train),
                knn.evaluate(data=self.x_test, targets=self.y_test))
Esempio n. 4
0
    def k_nearest_neighbours(self):
        n_neighbors = range(1, 100)
        knn = K_nearest_neighbours(x_train=self.x_train,
                                   y_train=self.y_train,
                                   cv=3,
                                   n_neighbors=n_neighbors,
                                   random_search=True)

        # knn.print_parameter_candidates()
        # knn.print_best_estimator()

        return (knn.evaluate(data=self.x_train,
                             targets=self.y_train,
                             average='micro'),
                knn.evaluate(data=self.x_test,
                             targets=self.y_test,
                             average='micro'))
Esempio n. 5
0
    def k_nearest_neighbours(self):
        """
        for knn, i train on the training data using different :
            1) n_neighbors
            2) weights

        :return: ((accuracy_train, recall_train, precision_train),
                  (accuracy_test,  recall_test,  precision_test))
        """
        # define parameters
        #         n_neighbors = np.logspace(start=1, stop=9, base=2, num=9, dtype=np.int)
        #         weights = ('distance', 'uniform')
        # best result over all n_neighbors: 64
        # best result over all weights: 'distance'

        # scale down parameters around its best result (1st)
        #         n_neighbors = norm.rvs(loc=64, scale=32, size=20).astype(np.int)
        #         weights = ('distance', 'uniform')
        # best result over all n_neighbors: 69
        # best result over all weights: 'distance'

        # scale down parameters around its best result (2nd)
        scale = 5
        n_neighbors = np.arange(start=69 - scale,
                                stop=69 + scale,
                                step=1,
                                dtype=np.int)
        weights = ('distance', 'uniform')

        # get the best validated model
        knn = K_nearest_neighbours(x_train=self.x_train,
                                   y_train=self.y_train,
                                   cv=5,
                                   n_neighbors=n_neighbors,
                                   weights=weights,
                                   grid_search=True)

        # print all possible parameter values and the best parameters
        # knn.print_parameter_candidates()
        # knn.print_best_estimator()

        return (knn.evaluate(data=self.x_train, targets=self.y_train),
                knn.evaluate(data=self.x_test, targets=self.y_test))
Esempio n. 6
0
    def k_nearest_neighbours(self):
        # define arguments given to GridSearchCV
        n_neighbors = range(1, 100, 1)  # [1, 3, 5, ..., 99]

        # get the best validated model
        knn = K_nearest_neighbours(x_train=self.x_train,
                                   y_train=self.y_train,
                                   cv=23,
                                   n_jobs=-1,
                                   n_neighbors=n_neighbors,
                                   grid_search=True)

        # print all possible parameter values and the best parameters
        # knn.print_parameter_candidates()
        # knn.print_best_estimator()

        # return the accuracy score
        return (knn.evaluate(data=self.x_train,
                             targets=self.y_train,
                             average='macro'),
                knn.evaluate(data=self.x_test,
                             targets=self.y_test,
                             average='macro'))