def preliminary_search(self, dataset, to_fix, repetitions): """ This function implements a preliminary search in order to fix some of the best hyperparameters to the current best values and then search again for the remaining hyperparameters Parameters ---------- dataset: int the dataset's index in self.monk to_fix: list a list of hyperparameters' names to fix repetitions: int cross validation's repetitions Returns ------- """ for prm in to_fix: self.selection.\ search(self.train_set[:, 1:], self.train_set[:, 0].reshape(-1, 1), save_results=True, fname='../data/model_selection_results_monk_{}.json'. format(dataset + 1), par_name=prm.upper()) best_hyps = self.selection.\ select_best_hyperparams( fname='../data/model_selection_results_monk_{}.json'. format(dataset + 1)) self.param_ranges[prm] = best_hyps[0]['hyperparams'][prm] self.grid = val.HyperGrid(self.param_ranges, size=self.grid_size, seed=datetime.now()) self.selection = val.ModelSelectionCV(self.grid, repetitions=1)
# defining grid param_ranges = dict() param_ranges['eta'] = (0.02, 2.0) param_ranges['alpha'] = 0.001 param_ranges['batch_size'] = (1, 100) param_ranges['hidden_sizes'] = [(1, 100), (10, 20)] param_ranges['reg_lambda'] = (0.0, 0.1) param_ranges['reg_method'] = 'l2' param_ranges['epochs'] = 200 imp.reload(val) # uniform grid grid_size = 3 grid = val.HyperGrid(param_ranges, size=grid_size, random=False) i = 0 for hyperparam in grid: pprint(hyperparam) i += 1 # random grid grid_size = 10 grid = val.HyperGrid(param_ranges, size=grid_size, random=True) len(grid) selection = val.ModelSelectionCV(grid=grid, repetitions=2) start = time.time() selection.search(X, y, nfolds=3)
def test(self, dataset, repetitions=1, preliminary_search=False, to_fix=[]): """ This function implements the testing procedure for the monks' datasets. It permits two kind of search for the best hyperparameters. The first kind of search simply performs a search using one HyperGrid, and selecting the set of hyperparameters which returns the best result. The second kind of search performs a deeper search, by searching first the best value for some hyperparameters, fixing them, and searching again the values for the remaing ones. Parameters ---------- dataset: int or list either a single index or a list of indexes, each one representing a dataset in self.monks repetitions: int cross validation's repetitions (Default value = 1) preliminary_search: bool whether or not to execute a preliminary search for the best value for some hyperparameters, fix them, and search again for the remaining hyperparameters (Default value = False) to_fix: list a list of hyperparameters that must be fixed (Default value = []) size: int the new hypergrid's size for the new search for the best hyperparameters in the preliminary_search function (Default value = 0) Returns ------- """ if type(dataset) == int: assert dataset >= 0 and dataset <= 2 dataset = [dataset] else: assert len(dataset) > 0 and len(dataset) <= 3 for ds in dataset: print 'TESTING MONK DATASET {}\n'.format(ds + 1) self.train_set = pd.\ read_csv(self.monks[ds][0], names=['class'] + ['x{}'.format(j) for j in range(17)]).values self.test_set = pd.\ read_csv(self.monks[ds][1], names=['class'] + ['x{}'.format(j) for j in range(17)]).values self.grid = val.HyperGrid(self.param_ranges, size=self.grid_size, seed=datetime.now()) self.selection = val.ModelSelectionCV(self.grid, repetitions=repetitions) # PRELIMINARY SEARCH FOR SOME OF THE PARAMETERS ################### assert len(to_fix) != 0 self.preliminary_search(ds, to_fix, repetitions) # SEARCHING FOR THE OTHER PARAMETERS ############################## self.selection.search( self.train_set[:, 1:], self.train_set[:, 0].reshape(-1, 1), save_results=True, fname='../data/model_selection_results_monk_{}.json'.format( ds + 1), par_name='REMAINING PARAMETERS') best_model = self.selection.\ select_best_model( self.train_set[:, 1:], self.train_set[:, 0].reshape(-1, 1), fname='../data/model_selection_results_monk_{}.json'. format(ds + 1)) y_pred = best_model.predict(self.test_set[:, 1:]) y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1) print '\n\n\n' bca = metrics.BinaryClassifierAssessment( self.test_set[:, 0].reshape(-1, 1), y_pred) self.save_best_result(ds, best_model, bca) self.plot_best_result(ds, best_model) self.param_ranges = self.param_ranges_backup.copy()
print 'STARTED WITH LOSS {}, ENDED WITH {}'.\ format(neural_net.error_per_epochs[0], neural_net.error_per_epochs[-1]) u.plot_learning_curve(neural_net, fname='/Users/gianmarco/Desktop/learning_curve.pdf') if raw_input('TESTING K-FOLD CROSS VALIDATION?[Y/N] ') == 'Y': cross_val = val.KFoldCrossValidation(X, y, neural_net) tqdm.write('AGGREGATED RESULTS: \n') pprint(cross_val.aggregated_results) if raw_input('TESTING GRID SEARCH?[Y/N] ') == 'Y': param_ranges = dict() param_ranges['eta'] = (0.02, 2.0) param_ranges['alpha'] = 0.001 param_ranges['batch_size'] = (1, 100) param_ranges['hidden_sizes'] = [(1, 100), (10, 20)] param_ranges['reg_lambda'] = (0.0, 0.1) param_ranges['reg_method'] = 'l2' param_ranges['epochs'] = 200 grid_size = 10 grid = val.HyperGrid(param_ranges, size=grid_size) selection = val.ModelSelectionCV(grid, repetitions=2) selection.search(X, y) results = selection.load_results() best_model = selection.select_best_model(X, y)
if beta_m == 'mhs': param_ranges['rho'] = (0., 1.) else: param_ranges['rho'] = 0.0 if beta_m == 'dl': param_ranges['t'] = (0., 1.) else: param_ranges['t'] = 0.0 param_ranges['optimizer'] = opt param_ranges['hidden_sizes'] = [4, 8] param_ranges['activation'] = 'sigmoid' param_ranges['task'] = 'classifier' grid = val.HyperGrid(param_ranges, grid_size, random=True) selection = val.ModelSelectionCV( grid, fname=fpath + 'monks_{}_experiment_{}_results.json.gz'.format(ds, experiment)) selection.search(X_design, y_design, nfolds=nfolds) best_hyperparameters = selection.select_best_hyperparams() json_name = '' if opt == 'SGD': json_name = '../data/final_setup/{}/{}/'.format(opt, type_m) + \ 'monks_{}_best_hyperparameters_{}.json'.format(ds, opt.lower()) else: json_name = '../data/final_setup/{}/'.format(opt) + \ 'monks_{}_best_hyperparameters_{}_{}.json'.format(ds, opt.lower(),