def decision_tree_classifier(self): # define param_grid argument to give GridSearchCV criterion = ['gini', 'entropy'] max_depth = range(1, 100, 2) # get the best validated model dtc = Decision_tree_classifier(x_train=self.x_train, y_train=self.y_train, cv=3, n_jobs=6, criterion=criterion, max_depth=max_depth, grid_search=True) # print all possible parameter values and the best parameters # dtc.print_parameter_candidates() # dtc.print_best_estimator() # return the accuracy score return (dtc.evaluate(data=self.x_train, targets=self.y_train, average='macro'), dtc.evaluate(data=self.x_test, targets=self.y_test, average='macro'))
def decision_tree_classifier(self): max_depth = range(1, 14) min_samples_leaf = range(1, 9) dtc = Decision_tree_classifier(x_train=self.x_train, y_train=self.y_train, cv=3, max_depth=max_depth, min_samples_leaf=min_samples_leaf, grid_search=True) #dtc.print_parameter_candidates() #dtc.print_best_estimator() return (dtc.evaluate(data=self.x_train, targets=self.y_train), dtc.evaluate(data=self.x_test, targets=self.y_test))
def decision_tree_classifier(self): criterion = ['gini', 'entropy'] max_depth = range(1, 100, 2) dtc = Decision_tree_classifier(x_train=self.x_train, y_train=self.y_train, cv=3, n_jobs=6, criterion=criterion, max_depth=max_depth, grid_search=True) dtc.print_parameter_candidates() dtc.print_best_estimator() return (dtc.evaluate(data=self.x_train, targets=self.y_train), dtc.evaluate(data=self.x_test, targets=self.y_test))
def decision_tree_classifier(self): """ for dtc, i train on the training data using different : 1) criterion 2) max_depth :return: ((accuracy_train, recall_train, precision_train), (accuracy_test, recall_test, precision_test)) """ # define parameters # criterion = ('gini', 'entropy') # max_depth = np.logspace(start=1, stop=6, base=2, num=6, dtype=np.int) # best result over all criterion: 'entropy' # best result over all max_depth: 8 # scale down parameters around its best result # criterion = ('gini', 'entropy') # scale = 4 # max_depth = np.arange(start=8-scale, stop=8+scale, step=1, dtype=np.int) # best result over all criterion: 'entropy' # best result over all max_depth: 7 # Due to the reason that the tuned parameters's accuracy is much lower # than the raw parameters, we would choose to use the raw parameters # get the best validated model dtc = Decision_tree_classifier( x_train=self.x_train, y_train=self.y_train, # cv=5, # criterion=criterion, # max_depth=max_depth, grid_search=True) # print all possible parameter values and the best parameters # dtc.print_parameter_candidates() # dtc.print_best_estimator() return (dtc.evaluate(data=self.x_train, targets=self.y_train), dtc.evaluate(data=self.x_test, targets=self.y_test))
def decision_tree_classifier(self): """ for dtc, i train on the training data using different : 1) criterion 2) max_depth :return: test accuracy of the dtc best model """ # define parameters # criterion = ('gini', 'entropy') # max_depth = np.logspace(start=1, stop=6, base=2, num=6, dtype=np.int) # best result over all criterion: 'gini' # best result over all max_depth: 4 # scale down parameters around its best result criterion = ('gini', 'entropy') scale = 1 max_depth = np.arange(start=4 - scale, stop=4 + scale, step=1, dtype=np.int) # best result over all criterion: 'gini' # best result over all max_depth: 4 # get the best validated model dtc = Decision_tree_classifier(x_train=self.x_train, y_train=self.y_train, cv=5, criterion=criterion, max_depth=max_depth, grid_search=True) # print all possible parameter values and the best parameters # dtc.print_parameter_candidates() # dtc.print_best_estimator() # return the accuracy score return (dtc.evaluate(data=self.x_train, targets=self.y_train), dtc.evaluate(data=self.x_test, targets=self.y_test))