def decision_tree_classifier(self):
        # define param_grid argument to give GridSearchCV
        criterion = ['gini', 'entropy']
        max_depth = range(1, 100, 2)

        # get the best validated model
        dtc = Decision_tree_classifier(x_train=self.x_train,
                                       y_train=self.y_train,
                                       cv=3,
                                       n_jobs=6,
                                       criterion=criterion,
                                       max_depth=max_depth,
                                       grid_search=True)

        # print all possible parameter values and the best parameters
        # dtc.print_parameter_candidates()
        # dtc.print_best_estimator()

        # return the accuracy score
        return (dtc.evaluate(data=self.x_train,
                             targets=self.y_train,
                             average='macro'),
                dtc.evaluate(data=self.x_test,
                             targets=self.y_test,
                             average='macro'))
    def decision_tree_classifier(self):
        max_depth = range(1, 14)
        min_samples_leaf = range(1, 9)

        dtc = Decision_tree_classifier(x_train=self.x_train,
                                       y_train=self.y_train,
                                       cv=3,
                                       max_depth=max_depth,
                                       min_samples_leaf=min_samples_leaf,
                                       grid_search=True)

        #dtc.print_parameter_candidates()
        #dtc.print_best_estimator()

        return (dtc.evaluate(data=self.x_train, targets=self.y_train),
                dtc.evaluate(data=self.x_test, targets=self.y_test))
    def decision_tree_classifier(self):
        """
        for dtc, i train on the training data using different :
            1) criterion
            2) max_depth

        :return: test accuracy of the dtc best model
        """

        # define param_grid argument to give GridSearchCV
        criterion = ['gini', 'entropy']
        max_depth = range(1, 100, 2)

        # get the best validated model
        dtc = Decision_tree_classifier(x_train=self.x_train,
                                       y_train=self.y_train,
                                       cv=3,
                                       n_jobs=-1,
                                       criterion=criterion,
                                       max_depth=max_depth,
                                       grid_search=True)

        # print all possible parameter values and the best parameters
        dtc.print_parameter_candidates()
        dtc.print_best_estimator()

        # return the accuracy score
        return dtc.accuracy_score(x_test=self.x_test, y_test=self.y_test)
Exemple #4
0
    def decision_tree_classifier(self):
        """
        for dtc, i train on the training data using different :
            1) criterion
            2) max_depth
        :return: test accuracy of the dtc best model
        """
        # define parameters
        criterion = ('gini', 'entropy')
        max_depth = np.logspace(start=1, stop=6, base=2, num=6, dtype=np.int)
        # best result over all criterion: 'entropy'
        # best result over all max_depth: 2

        # scale down parameters around its best result
        criterion = ('gini', 'entropy')
        scale = 1
        max_depth = np.arange(start=2 - scale,
                              stop=2 + scale,
                              step=1,
                              dtype=np.int)

        # get the best validated model
        dtc = Decision_tree_classifier(x_train=self.x_train,
                                       y_train=self.y_train,
                                       cv=5,
                                       criterion=criterion,
                                       max_depth=max_depth,
                                       grid_search=True)

        # print all possible parameter values and the best parameters
        dtc.print_parameter_candidates()
        dtc.print_best_estimator()

        # return the accuracy score
        return dtc.accuracy_score(x_test=self.x_test, y_test=self.y_test)
    def decision_tree_classifier(self):
        criterion = ['gini', 'entropy']
        max_depth = range(1, 100, 2)

        dtc = Decision_tree_classifier(x_train=self.x_train,
                                       y_train=self.y_train,
                                       cv=3,
                                       n_jobs=-1,
                                       criterion=criterion,
                                       max_depth=max_depth,
                                       grid_search=True)

        # dtc.print_parameter_candidates()
        # dtc.print_best_estimator()

        return (dtc.evaluate(data=self.x_train,
                             targets=self.y_train,
                             average='micro'),
                dtc.evaluate(data=self.x_test,
                             targets=self.y_test,
                             average='micro'))
Exemple #6
0
    def decision_tree_classifier(self):
        """
        for dtc, i train on the training data using different :
            1) criterion
            2) max_depth

        :return: ((accuracy_train, recall_train, precision_train),
                  (accuracy_test,  recall_test,  precision_test))
        """
        # define parameters
        #         criterion = ('gini', 'entropy')
        #         max_depth = np.logspace(start=1, stop=6, base=2, num=6, dtype=np.int)
        # best result over all criterion: 'entropy'
        # best result over all max_depth: 8

        # scale down parameters around its best result
        #         criterion = ('gini', 'entropy')
        #         scale = 4
        #         max_depth = np.arange(start=8-scale, stop=8+scale, step=1, dtype=np.int)
        # best result over all criterion: 'entropy'
        # best result over all max_depth: 7

        # Due to the reason that the tuned parameters's accuracy is much lower
        # than the raw parameters, we would choose to use the raw parameters

        # get the best validated model
        dtc = Decision_tree_classifier(
            x_train=self.x_train,
            y_train=self.y_train,
            #             cv=5,
            #             criterion=criterion,
            #             max_depth=max_depth,
            grid_search=True)

        # print all possible parameter values and the best parameters
        # dtc.print_parameter_candidates()
        # dtc.print_best_estimator()

        return (dtc.evaluate(data=self.x_train, targets=self.y_train),
                dtc.evaluate(data=self.x_test, targets=self.y_test))
Exemple #7
0
    def decision_tree_classifier(self):
        """
        for dtc, i train on the training data using different :
            1) criterion
            2) max_depth
        :return: test accuracy of the dtc best model
        """
        # define parameters
        #         criterion = ('gini', 'entropy')
        #         max_depth = np.logspace(start=1, stop=6, base=2, num=6, dtype=np.int)
        # best result over all criterion: 'gini'
        # best result over all max_depth: 4

        # scale down parameters around its best result
        criterion = ('gini', 'entropy')
        scale = 1
        max_depth = np.arange(start=4 - scale,
                              stop=4 + scale,
                              step=1,
                              dtype=np.int)
        # best result over all criterion: 'gini'
        # best result over all max_depth: 4

        # get the best validated model
        dtc = Decision_tree_classifier(
            x_train=self.x_train,
            y_train=self.y_train,
            cv=1,
            criterion=('entropy', ),
            max_depth=(10, ),
            #class_weight=({1:8,0:2},),
            grid_search=True)

        # print all possible parameter values and the best parameters
        dtc.print_parameter_candidates()
        dtc.print_best_estimator()

        print('dtc precision: %.2f %%' %
              (dtc.precision(self.x_test, self.y_test) * 100))
        print('dtc recall: %.2f %%' %
              (dtc.recall(self.x_test, self.y_test) * 100))
        # return the accuracy score
        return dtc.accuracy_score(x_test=self.x_test, y_test=self.y_test)