def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py # Search for good alphas alphas = np.arange(1, 11) max_depths = np.arange(1, 41, 1) # np.arange(1, 11) base = learners.DTLearner(criterion='gini', class_weight='balanced', random_state=self._details.seed) of_base = learners.DTLearner(criterion='gini', class_weight='balanced', random_state=self._details.seed) booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=base, random_state=self._details.seed) of_booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=of_base, random_state=self._details.seed) # TODO: No 90 here? params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100], 'Boost__base_estimator__max_depth': max_depths} iteration_params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]} of_params = {'Boost__base_estimator__max_depth': 100, 'Boost__n_estimators': 50} complexity_param = {'name': 'Boost__n_estimators', 'display_name': 'Estimator count', 'x_scale': 'log', 'values': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]} experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster, 'Boost', 'Boost', params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, verbose=self._verbose) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, of_booster, 'Boost_OF', 'Boost', of_params, seed=self._details.seed, iteration_params=iteration_params, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py max_depths = np.arange(1, 11, 1) # NOTE: Criterion may need to be adjusted here depending on the dataset base = learners.DTLearner(criterion='entropy', class_weight='balanced', max_depth=10, random_state=self._details.seed) of_base = learners.DTLearner(criterion='entropy', class_weight='balanced', random_state=self._details.seed) booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=base, random_state=self._details.seed) of_booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=of_base, random_state=self._details.seed) params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100], 'Boost__learning_rate': [(2**x)/100 for x in range(7)]+[1], 'Boost__base_estimator__max_depth': max_depths} iteration_details = { 'params': {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]} } of_params = {'Boost__base_estimator__max_depth': None} #complexity_param = {'name': 'Boost__learning_rate', 'display_name': 'Learning rate', 'x_scale': 'log', # 'values': [(2**x)/100 for x in range(7)]+[1]} complexity_param = {'name': 'Boost__n_estimators', 'display_name': 'N_estimators', 'x_scale': 'linear', 'values': [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100]} best_params = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1: # best_params = {'base_estimator__max_depth': 4, 'learning_rate': 0.32, 'n_estimators': 20} # # Dataset 2: best_params = {'base_estimator__max_depth': 5, 'learning_rate': 0.64, 'n_estimators': 45} if best_params is not None: booster.set_params(**best_params) of_booster.set_params(**best_params) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster, 'Boost', 'Boost', params, complexity_param=complexity_param, iteration_details=iteration_details, best_params=best_params, seed=self._details.seed, threads=self._details.threads, verbose=self._verbose) # TODO: This should turn OFF regularization experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, of_booster, 'Boost_OF', 'Boost', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)
def perform(self): # TODO: Clean up the older alpha stuff? max_depths = np.arange(1, 51, 1) params = { 'DT__criterion': ['gini', 'entropy'], 'DT__max_depth': max_depths, 'DT__class_weight': ['balanced', None] } # , 'DT__max_leaf_nodes': max_leaf_nodes} complexity_param = { 'name': 'DT__max_depth', 'display_name': 'Max Depth', 'values': max_depths } best_params = None learner = learners.DTLearner(random_state=self._details.seed) if self._details.ds_best_params is not None and 'DT' in self._details.ds_best_params: best_params = self._details.ds_best_params['DT'] if best_params is not None: learner.set_params(**best_params) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'DT', 'DT', params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, best_params=best_params, verbose=self._verbose)
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/DT.py max_depths = np.arange(1, 25, 1) params = { 'DT__criterion': ['gini', 'entropy'], 'DT__max_depth': max_depths, 'DT__class_weight': ['balanced'] } complexity_param = { 'name': 'DT__max_depth', 'display_name': 'Max Depth', 'values': max_depths } learner = learners.DTLearner(random_state=self._details.seed) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'DT', 'DT', params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, verbose=self._verbose)
def perform(self): # TODO: Clean up the older alpha stuff? max_depths = np.arange(1, 51, 1) params = {'DT__criterion': ['gini', 'entropy'], 'DT__max_depth': max_depths, 'DT__class_weight': ['balanced', None]} # , 'DT__max_leaf_nodes': max_leaf_nodes} complexity_param = {'name': 'DT__max_depth', 'display_name': 'Max Depth', 'values': max_depths} best_params = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1: # best_params = {'criterion': 'gini', 'max_depth': 5, 'class_weight': 'balanced'} # # Dataset 2: best_params = {'criterion': 'entropy', 'max_depth': 14, 'class_weight': 'balanced'} learner = learners.DTLearner(random_state=self._details.seed) if best_params is not None: learner.set_params(**best_params) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'DT', 'DT', params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, best_params=best_params, verbose=self._verbose)
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py max_depths = np.arange(1, 11, 1) # NOTE: Criterion may need to be adjusted here depending on the dataset base = learners.DTLearner(criterion='gini', class_weight='balanced', max_depth=7, random_state=self._details.seed) of_base = learners.DTLearner(criterion='gini', class_weight='balanced', random_state=self._details.seed) booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=base, random_state=self._details.seed) of_booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=of_base, random_state=self._details.seed) params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100], 'Boost__learning_rate': [(2**x)/100 for x in range(7)]+[1], 'Boost__base_estimator__max_depth': max_depths} iteration_details = { 'params': {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]} } of_params = {'Boost__base_estimator__max_depth': None} complexity_param = {'name': 'Boost__learning_rate', 'display_name': 'Learning rate', 'x_scale': 'log', 'values': [(2**x)/100 for x in range(7)]+[1]} best_params = None if self._details.ds_best_params is not None and 'Boost' in self._details.ds_best_params: best_params = self._details.ds_best_params['Boost'] if best_params is not None: booster.set_params(**best_params) of_booster.set_params(**best_params) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster, 'Boost', 'Boost', params, complexity_param=complexity_param, iteration_details=iteration_details, best_params=best_params, seed=self._details.seed, threads=self._details.threads, verbose=self._verbose) # TODO: This should turn OFF regularization experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, of_booster, 'Boost_OF', 'Boost', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)
def perform(self): # TODO: Clean up the older alpha stuff? max_depths = np.arange(1, 51, 1) params = { "DT__criterion": ["gini", "entropy"], "DT__max_depth": max_depths, "DT__class_weight": ["balanced", None] } # , "DT__max_leaf_nodes": max_leaf_nodes} complexity_param = { "name": "DT__max_depth", "display_name": "Max Depth", "values": max_depths } # max_leaf_nodes = np.arange(10, 200, 10) # params = {"DT__criterion": ["gini", "entropy"], # "DT__class_weight": ["balanced", None], "DT__max_leaf_nodes": max_leaf_nodes} # complexity_param = { # "name": "DT__max_leaf_nodes", "display_name": "Max Leaf Nodes", "values": max_leaf_nodes} best_params = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1: # Seed: 2702306879, 3882803657 # best_params = {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 11} # # Dataset 2: # best_params = {"criterion": "entropy", "max_depth": 4, "class_weight": "balanced"} learner = learners.DTLearner(random_state=self._details.seed) if best_params is not None: learner.set_params(**best_params) self.log( "Best parameters are provided, GridSearchCV will is skipped") else: self.log( "Best parameters are not provided, GridSearchCV is scheduled") experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, "DT", "DT", params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, best_params=best_params, verbose=self._verbose)
def perform(self): # TODO: Clean up the older alpha stuff? max_depths = np.arange(1, 50, 1) params = None complexity_param = None if self._details.ds_name == "poisonous_mushrooms": params = { "DT__criterion": ["gini"], "DT__max_depth": max_depths, } # , 'DT__max_leaf_nodes': max_leaf_nodes} complexity_param = { "name": "DT__max_depth", "display_name": "Max Depth", "values": max_depths, } elif self._details.ds_name == "spam": params = { "DT__criterion": ["gini"], "DT__max_depth": max_depths, } # , 'DT__max_leaf_nodes': max_leaf_nodes} complexity_param = { "name": "DT__max_depth", "display_name": "Max Depth", "values": max_depths, } best_params = None # if self._details.ds_name == "poisonous_mushrooms": # best_params = {"criterion": "gini", "max_depth": 7} # elif self._details.ds_name == "spam": # best_params = {"criterion": "gini", "max_depth": 50} learner = learners.DTLearner(random_state=self._details.seed) if best_params is not None: learner.set_params(**best_params) experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, "DT", "DT", params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, best_params=best_params, verbose=self._verbose, )
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py alphas = [x / 1000 for x in range(-10, 40, 4)] crit = "entropy" lr = [(2**x) / 100 for x in range(7)] + [1] n_estimators = [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] n_estimators_iter = [ 1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 140, 160, 200, 240, 300 ] # /output-ew2 if 'enhancer-b' == self._details.ds_name and self._details.bparams: alphas = [0.05] crit = "gini" lr = [0.16] + [10**(x / 8) for x in range(-32, 16)] lr = [0.32] n_estimators = n_estimators_iter n_estimators = [5] if 'wine-qual' == self._details.ds_name and self._details.bparams: alphas = [0.014] crit = "gini" lr = [0.16] # use old lr range here n_estimators = [20] # NOTE: Criterion may need to be adjusted here depending on the dataset base = learners.DTLearner(criterion=crit, class_weight='balanced', random_state=self._details.seed) of_base = learners.DTLearner(criterion=crit, class_weight='balanced', random_state=self._details.seed) booster = learners.BoostingLearner(algorithm='SAMME.R', learning_rate=1, base_estimator=base, random_state=self._details.seed) of_booster = learners.BoostingLearner(algorithm='SAMME.R', learning_rate=1, base_estimator=of_base, random_state=self._details.seed) params = { 'Boost__n_estimators': n_estimators, 'Boost__learning_rate': lr, 'Boost__base_estimator__alpha': alphas, 'Boost__random_state': [self._details.seed], 'Boost__base_estimator__random_state': [self._details.seed] } iteration_details = { 'params': { 'Boost__n_estimators': n_estimators_iter } } of_params = {'Boost__base_estimator__alpha': -1} complexity_param = { 'name': 'Boost__learning_rate', 'display_name': 'Learning rate', 'x_scale': 'log', 'values': [10**(x / 8) for x in range(-32, 16)] } best_params = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1: # best_params = {'base_estimator__max_depth': 8, 'learning_rate': 0.32, 'n_estimators': 90} # # Dataset 2: # best_params = {'base_estimator__max_depth': 6, 'learning_rate': 0.16, 'n_estimators': 20} if best_params is not None: booster.set_params(**best_params) of_booster.set_params(**best_params) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster, 'Boost', 'Boost', params, complexity_param=complexity_param, iteration_details=iteration_details, best_params=best_params, seed=self._details.seed, threads=self._details.threads, verbose=self._verbose)
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py max_depths = np.arange(1, 15, 1) # NOTE: Criterion may need to be adjusted here depending on the dataset base = learners.DTLearner( criterion="entropy", class_weight="balanced", max_depth=5, random_state=self._details.seed, ) of_base = learners.DTLearner( criterion="entropy", class_weight="balanced", random_state=self._details.seed, ) booster = learners.BoostingLearner( algorithm="SAMME", learning_rate=1, base_estimator=base, random_state=self._details.seed, ) of_booster = learners.BoostingLearner( algorithm="SAMME", learning_rate=1, base_estimator=of_base, random_state=self._details.seed, ) params = { "Boost__n_estimators": [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100], "Boost__learning_rate": [(2**x) / 100 for x in range(7)] + [1], "Boost__base_estimator__max_depth": max_depths, } iteration_details = { "params": { "Boost__n_estimators": [ 1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, ] } } of_params = {"Boost__base_estimator__max_depth": None} complexity_param = { "name": "Boost__learning_rate", "display_name": "Learning rate", "x_scale": "log", "values": [(2**x) / 100 for x in range(7)] + [1], } best_params = None # # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # # the various graphs # # # if self._details.ds_name == "spam": # best_params = { # "base_estimator__max_depth": 10, # "learning_rate": 0.32, # "n_estimators": 30, # } # elif self._details.ds_name == "poisonous_mushrooms": # best_params = { # "base_estimator__max_depth": 10, # "learning_rate": 0.08, # "n_estimators": 60, # } # # Dataset 1: # # best_params = {'base_estimator__max_depth': 8, 'learning_rate': 0.32, 'n_estimators': 90} # # # # Dataset 2: # # best_params = {'base_estimator__max_depth': 6, 'learning_rate': 0.16, 'n_estimators': 20} # if best_params is not None: # booster.set_params(**best_params) # of_booster.set_params(**best_params) experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster, "Boost", "Boost", params, complexity_param=complexity_param, iteration_details=iteration_details, best_params=best_params, seed=self._details.seed, threads=self._details.threads, verbose=self._verbose, )
def perform(self): # TODO: Clean up the older alpha stuff? max_depths = np.arange(1, 21, 1) #alphas = [-1,-1e-3,-(1e-3)*10**-0.5, -1e-2, -(1e-2)*10**-0.5,-1e-1,-(1e-1)*10**-0.5, 0, (1e-1)*10**-0.5,1e-1,(1e-2)*10**-0.5,1e-2,(1e-3)*10**-0.5,1e-3] alphas = [x / 1000 for x in range(-40, 40, 4)] #params = {'DT__criterion': ['gini', 'entropy'], # 'DT__max_depth': max_depths, # 'alpha' : alphas, # 'DT__class_weight': ['balanced', None] #} # , 'DT__max_leaf_nodes': max_leaf_nodes} params = { 'DT__criterion': ['gini', 'entropy'], 'DT__alpha': alphas, 'DT__class_weight': ['balanced'], 'DT__random_state': [self._details.seed] } complexity_param = { 'name': 'DT__alpha', 'display_name': 'alpha', 'values': alphas } best_params = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1: params_wine = { 'DT__criterion': 'gini', 'DT__alpha': 0.008, 'DT__class_weight': 'balanced' } if self._details.ds_name == "wine-qual" and self._details.bparams: for k in params.keys(): if k in params_wine.keys(): params[k] = [params_wine.get(k)] # # Dataset 2: params_enhancer = { 'DT__criterion': 'gini', 'DT__alpha': 0.008, 'DT__class_weight': 'balanced' } if self._details.ds_name == "enhancer-b" and self._details.bparams: for k in params.keys(): if k in params_enhancer.keys(): params[k] = [params_enhancer.get(k)] learner = learners.DTLearner(random_state=self._details.seed) if best_params is not None: learner.set_params(**best_params) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'DT', 'DT', params, complexity_param=complexity_param, seed=self._details.seed, threads=self._details.threads, best_params=best_params, verbose=self._verbose, apply_pruning=True)