def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py alphas = [10**-x for x in np.arange(1, 9.01, 1 / 2)] samples = self._details.ds.features.shape[0] gamma_fracs = np.arange(0.2, 2.1, 0.2) params = { 'SVM__alpha': alphas, 'SVM__max_iter': [int((1e6 / samples) / .8) + 1], 'SVM__gamma_frac': gamma_fracs } complexity_param = { 'name': 'SVM__gamma_frac', 'display_name': 'Gamma Fraction', 'values': gamma_fracs } iteration_params = {'SVM__max_iter': [2**x for x in range(12)]} learner = learners.SVMLearner(tol=None) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF', 'SVM', params, complexity_param=complexity_param, seed=self._details.seed, iteration_params=iteration_params, threads=self._details.threads, verbose=self._verbose) of_params = best_params.copy() of_params['SVM__alpha'] = 1e-16 learner = learners.SVMLearner(n_jobs=self._details.threads) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF_OF', 'SVM', of_params, seed=self._details.seed, iteration_params=iteration_params, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py samples = self._details.ds.features.shape[0] features = self._details.ds.features.shape[1] # original # gamma_fracs = np.arange(1/features, 2.1, 0.2) # tols = np.arange(1e-8, 1e-1, 0.01) # C_values = np.arange(0.001, 2.5, 0.25) # iters = [-1, int((1e6/samples)/.8)+1] # YS changed gamma_fracs = np.arange(1 / features, 1.1, 0.25) tols = np.arange(1e-3, 1e-1, 0.03) C_values = np.arange(0.1, 2.5, 0.5) iters = [-1, int((1e6 / samples) / .8) + 1] best_params_linear = None best_params_rbf = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1: # best_params_linear = {'C': 0.5, 'class_weight': 'balanced', 'loss': 'squared_hinge', # 'max_iter': 1478, 'tol': 0.06000001} # best_params_rbf = {'C': 2.0, 'class_weight': 'balanced', 'decision_function_shape': 'ovo', # 'gamma': 0.05555555555555555, 'max_iter': -1, 'tol': 1e-08} # Dataset 2: # best_params_linear = {'C': 1.0, 'class_weight': 'balanced', 'loss': 'hinge', 'dual': True, # 'max_iter': 70, 'tol': 0.08000001} # best_params_rbf = {'C': 1.5, 'class_weight': 'balanced', 'decision_function_shape': 'ovo', # 'gamma': 0.125, 'max_iter': -1, 'tol': 0.07000001} # Linear SVM params = { 'SVM__max_iter': iters, 'SVM__tol': tols, 'SVM__class_weight': ['balanced'], 'SVM__C': C_values } # original # complexity_param = {'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1)} # YS changed complexity_param = { 'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.1, 2.5, 0.5) } iteration_details = { 'x_scale': 'log', # original # 'params': {'SVM__max_iter': [2**x for x in range(12)]}, # YS changed from range(12) 'params': { 'SVM__max_iter': [4**x for x in range(6)] }, } # NOTE: If this is causing issues, try the RBFSVMLearner. Passing use_linear=True will use a linear kernel # and passing use_linear=False will use the RBF kernel. This method is slower but if libsvm is not # available it may be your only option learner = learners.LinearSVMLearner(dual=False) if best_params_linear is not None: learner.set_params(**best_params_linear) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVMLinear', 'SVM', params, complexity_param=complexity_param, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_linear, threads=self._details.threads, verbose=self._verbose) of_params = best_params.copy() learner = learners.LinearSVMLearner(dual=True) if best_params_linear is not None: learner.set_params(**best_params_linear) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVMLinear_OF', 'SVM', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_linear, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True) # RBF SVM params = { 'SVM__max_iter': iters, 'SVM__tol': tols, 'SVM__class_weight': ['balanced'], 'SVM__C': C_values, 'SVM__decision_function_shape': ['ovo', 'ovr'], 'SVM__gamma': gamma_fracs } # original # complexity_param = {'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1)} # YS changed complexity_param = { 'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.1, 2.5, 0.5) } learner = learners.SVMLearner(kernel='rbf') if best_params_rbf is not None: learner.set_params(**best_params_rbf) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF', 'SVM', params, complexity_param=complexity_param, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose) of_params = best_params.copy() learner = learners.SVMLearner(kernel='rbf') if best_params_rbf is not None: learner.set_params(**best_params_rbf) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF_OF', 'SVM', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)
reducer names -> functions that take reduced dim as an arg """ REDUCERS = { "select": reduction.SelectKBestReduction, "pca-linear": lambda dim: reduction.KernelPCAReduction(dim, kernel='linear'), "pca-cosine": lambda dim: reduction.KernelPCAReduction(dim, kernel='cosine'), "none": lambda dim: reduction.NoopReduction(), } """ learner names -> functions that return learner instances """ LEARNERS = { "nb": learners.GaussianNBLearner, "svm-linear": lambda: learners.SVMLearner(kernel='linear'), "svm-rbf": lambda: learners.SVMLearner(kernel='rbf'), "svm-poly": lambda: learners.SVMLearner(kernel='poly'), "knn": lambda: learners.KNeighborsLearner(), "tree": lambda: learners.DecisionTreeLearner(), } """ Data format: { "body": string "post_ups": int "subreddit_id": string "created": float (timestamp) "downs": int "author": string
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py samples = self._details.ds.features.shape[0] features = self._details.ds.features.shape[1] gamma_fracs = np.arange(1 / features, 2.1, 0.2) tols = np.arange(1e-8, 1e-1, 0.01) C_values = np.arange(0.001, 2.5, 0.25) iters = [-1, int((1e6 / samples) / .8) + 1] best_params_linear = None if self._details.ds_best_params is not None and 'SVM_Linear' in self._details.ds_best_params: best_params_linear = self._details.ds_best_params['SVM_Linear'] best_params_rbf = None if self._details.ds_best_params is not None and 'SVM_RBF' in self._details.ds_best_params: best_params_rbf = self._details.ds_best_params['SVM_RBF'] # Linear SVM params = { 'SVM__max_iter': iters, 'SVM__tol': tols, 'SVM__class_weight': ['balanced'], 'SVM__C': C_values } complexity_param = { 'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1) } iteration_details = { 'x_scale': 'log', 'params': { 'SVM__max_iter': [2**x for x in range(12)] }, } # RBF SVM if len(np.unique(self._details.ds.classes)) > 2: decision_functions = ['ovo'] else: decision_functions = ['ovo', 'ovr'] params = { 'SVM__max_iter': iters, 'SVM__tol': tols, 'SVM__class_weight': ['balanced'], 'SVM__C': C_values, 'SVM__decision_function_shape': decision_functions, 'SVM__gamma': gamma_fracs } complexity_param = { 'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1) } learner = learners.SVMLearner(kernel='rbf') if best_params_rbf is not None: learner.set_params(**best_params_rbf) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF', 'SVM', params, complexity_param=complexity_param, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose) of_params = best_params.copy() learner = learners.SVMLearner(kernel='rbf') if best_params_rbf is not None: learner.set_params(**best_params_rbf) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF_OF', 'SVM', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py samples = self._details.ds.features.shape[0] features = self._details.ds.features.shape[1] gamma_fracs = np.arange(1 / features, 2.1, 0.2) tols = np.arange(1e-8, 1e-1, 0.01) C_values = np.arange(0.001, 2.5, 0.25) iters = [-1, int((1e6 / samples) / 0.8) + 1] best_params_linear = None best_params_rbf = None # # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # # the various graphs # # # if self._details.ds_name == "spam": # best_params_linear = { # "C": 0.101, # "class_weight": "balanced", # "loss": "squared_hinge", # "max_iter": 33, # "tol": 1.00e-08, # } # best_params_rbf = { # "C": 0.251, # "class_weight": "balanced", # "decision_function_shape": "ovo", # "tol": 0.07000001, # } # elif self._details.ds_name == "poisonous_mushrooms": # best_params_linear = { # "C": 0.001, # "class_weight": "balanced", # "loss": "squared_hinge", # "max_iter": 42, # "tol": 1.00e-08, # } # best_params_rbf = { # "C": 0.251, # "class_weight": "balanced", # "decision_function_shape": "ovo", # "tol": 0.06000001, # } # Linear SVM params = { "SVM__max_iter": iters, "SVM__tol": tols, "SVM__class_weight": [{ 1: 10 }], "SVM__C": C_values, } complexity_param = { "name": "SVM__C", "display_name": "Penalty", "values": np.arange(0.001, 2.5, 0.1), } iteration_details = { "x_scale": "log", "params": { "SVM__max_iter": [2**x for x in range(12)] }, } # NOTE: If this is causing issues, try the RBFSVMLearner. Passing use_linear=True will use a linear kernel # and passing use_linear=False will use the RBF kernel. This method is slower but if libsvm is not # available it may be your only option learner = learners.LinearSVMLearner(dual=False) if best_params_linear is not None: learner.set_params(**best_params_linear) # best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, "SVMLinear", "SVM", params, complexity_param=complexity_param, seed=self._details.seed, best_params=best_params_linear, threads=self._details.threads, verbose=self._verbose, ) of_params = best_params.copy() learner = learners.LinearSVMLearner(dual=True) if best_params_linear is not None: learner.set_params(**best_params_linear) experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, "SVMLinear_OF", "SVM", of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_linear, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True, ) # RBF SVM params = { "SVM__max_iter": iters, "SVM__tol": tols, "SVM__class_weight": ["balanced"], "SVM__C": C_values, "SVM__decision_function_shape": ["ovo", "ovr"], "SVM__gamma": gamma_fracs, } params = { "SVM__C": C_values, "SVM__class_weight": ["balanced"], "SVM__tol": tols, "SVM__decision_function_shape": ["ovo", "ovr"], } complexity_param = { "name": "SVM__C", "display_name": "Penalty", "values": np.arange(0.001, 2.5, 0.25), } # learner = learners.SVMLearner(kernel="rbf") if best_params_rbf is not None: learner.set_params(**best_params_rbf) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, "SVM_RBF", "SVM", params, complexity_param=complexity_param, seed=self._details.seed, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose, ) of_params = best_params.copy() learner = learners.SVMLearner(kernel="rbf") if best_params_rbf is not None: learner.set_params(**best_params_rbf) experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, "SVM_RBF_OF", "SVM", of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True, )
def perform(self): # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py samples = self._details.ds.features.shape[0] features = self._details.ds.features.shape[1] gamma_fracs = np.arange(1 / features, 2.1, 0.2) tols = np.arange(1e-8, 1e-1, 0.01) C_values = np.arange(0.001, 2.5, 0.25) iters = [-1, int((1e6 / samples) / .8) + 1] best_params_linear = None best_params_rbf = None # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild # the various graphs # # Dataset 1 (credit default): ''' best_params_linear = { "C": 0.251, "class_weight": "balanced", "dual": False, "fit_intercept": True, "intercept_scaling": 1, "loss": "squared_hinge", "max_iter": 42, "multi_class": "ovr", "penalty": "l2", "tol": 0.020000010000000002, "verbose": False } best_params_rbf = { "C": 0.751, "cache_size": 200, "class_weight": "balanced", "coef0": 0, "decision_function_shape": "ovo", "degree": 3, "gamma": 0.043478260869565216, "kernel": "rbf", "max_iter": -1, "probability": False, "shrinking": True, "tol": 0.08000001, "verbose": False } # Dataset 2: best_params_linear = { "C": 0.251, "class_weight": "balanced", "dual": False, "fit_intercept": True, "intercept_scaling": 1, "loss": "squared_hinge", "max_iter": 42, "multi_class": "ovr", "penalty": "l2", "tol": 0.020000010000000002, "verbose": False } best_params_rbf = { "C": 1.501, "cache_size": 200, "class_weight": "balanced", "coef0": 0, "decision_function_shape": "ovo", "degree": 3, "gamma": 0.0056179775280898875, "kernel": "rbf", "max_iter": -1, "probability": False, "shrinking": True, "tol": 0.09000000999999999, "verbose": False } ''' # Linear SVM params = { 'SVM__max_iter': iters, 'SVM__tol': tols, 'SVM__class_weight': ['balanced'], 'SVM__C': C_values } complexity_param = { 'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1) } iteration_details = { 'x_scale': 'log', 'params': { 'SVM__max_iter': [2**x for x in range(12)] }, } # NOTE: If this is causing issues, try the RBFSVMLearner. Passing use_linear=True will use a linear kernel # and passing use_linear=False will use the RBF kernel. This method is slower but if libsvm is not # available it may be your only option learner = learners.LinearSVMLearner(dual=False) if best_params_linear is not None: learner.set_params(**best_params_linear) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVMLinear', 'SVM', params, complexity_param=complexity_param, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_linear, threads=self._details.threads, verbose=self._verbose) of_params = best_params.copy() learner = learners.LinearSVMLearner(dual=True) if best_params_linear is not None: learner.set_params(**best_params_linear) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVMLinear_OF', 'SVM', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_linear, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True) # RBF SVM params = { 'SVM__max_iter': iters, 'SVM__tol': tols, 'SVM__class_weight': ['balanced'], 'SVM__C': C_values, 'SVM__decision_function_shape': ['ovo', 'ovr'], 'SVM__gamma': gamma_fracs } complexity_param = { 'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1) } learner = learners.SVMLearner(kernel='rbf') if best_params_rbf is not None: learner.set_params(**best_params_rbf) best_params = experiments.perform_experiment( self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF', 'SVM', params, complexity_param=complexity_param, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose) of_params = best_params.copy() learner = learners.SVMLearner(kernel='rbf') if best_params_rbf is not None: learner.set_params(**best_params_rbf) experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'SVM_RBF_OF', 'SVM', of_params, seed=self._details.seed, iteration_details=iteration_details, best_params=best_params_rbf, threads=self._details.threads, verbose=self._verbose, iteration_lc_only=True)