def _run_language_bayesian(lang, train_loader, val_loader, test_loader, token_map, args, max_order=3): vocab_size = len(token_map) full_avg_len = np.mean([len(x) for x in (train_loader)] + [len(x) for x in (val_loader)] + [len(x) for x in (test_loader)]) - 2 avg_len = np.mean([len(x) for x in (train_loader)]) - 2 n_iters = 45 unigram_min_max = [[0.1, 1.]] ngram_min_max = [[0.0, 1.]] * (max_order - 1) bounds = np.array(unigram_min_max + ngram_min_max) n_pre_samples = 5 sample_loss = sample_loss_getter(lang, train_loader, val_loader, test_loader, token_map, args) xp, yp = bayesian_optimisation(n_iters, sample_loss, bounds, n_pre_samples=n_pre_samples) test_loss, val_loss, opt_params = get_optimal_loss(train_loader, val_loader, test_loader, vocab_size, xp, yp, args) print('Test loss: %.4f Val loss: %.4f' % (test_loss, val_loss)) return full_avg_len, avg_len, test_loss, val_loss, opt_params, xp, yp
def optimize_languages(args): print('------------------- Start -------------------') languages, token_map, data_split, concept_ids = read_info() print('Model %s' % args.model) print('Train %d, Val %d, Test %d' % (len(data_split[0]), len(data_split[1]), len(data_split[2]))) n_iters = 45 bounds = np.array([[4, 256], [32, 256], [1, 2.95], [0.0, 0.5]]) n_pre_samples = 5 sample_loss = sample_loss_getter(languages, token_map, concept_ids, args) xp, yp = bayesian_optimisation(n_iters, sample_loss, bounds, n_pre_samples=n_pre_samples) opt_results, test_results = get_optimal_loss(languages, token_map, xp, yp, concept_ids, args) log_results = [[ 'lang', 'test_loss', 'test_acc', 'best_epoch', 'val_loss', 'val_acc', 'embedding_size', 'hidden_size', 'nlayers', 'dropout' ]] log_results += [opt_results] log_results += [[]] for lang, result in test_results.items(): log_results += [[lang] + list(result)] write_csv( log_results, '%s/%s__bayesian-shared-results.csv' % (args.rfolder, args.model))
def gamma_tuning(): print('-- Beginning C Search') bounds = np.array([[0, 1]]) results = bayesian_optimisation(n_iters=5, sample_loss=sample_loss_gamma, bounds=bounds, gp_params = {'kernel': Matern(), 'alpha': 1e-5, 'n_restarts_optimizer': 10, 'normalize_y': True}) print('Best C: %s, Best score: %s' % (results[0][list(results[1]).index(max(results[1]))][0], max(results[1]))) return results[0][list(results[1]).index(max(results[1]))][0]
def find_feats(): print('Searching for best number of features...') bounds = np.array([[1, len(list(x_data))]]) start = [[len(list(x_data))]] results = bayesian_optimisation(n_iters=5, sample_loss=sample_loss_n_feats, bounds=bounds, x0 = start, gp_params = {'kernel': Matern(), 'alpha': 1e-5, 'n_restarts_optimizer': 10, 'normalize_y': True}) return int(results[0][list(results[1]).index(max(results[1]))])
def optimize_languages(args): print('------------------- Start -------------------') _, token_map, data_split, concept_ids, ipa_to_concepts = read_info() languages = get_languages(is_devoicing=args.is_devoicing) token_map = add_new_symbols_to_vocab(token_map) print('Model %s' % args.model) print('Train %d, Val %d, Test %d' % (len(data_split[0]), len(data_split[1]), len(data_split[2]))) n_iters = 45 bounds = np.array([[4, 256], [32, 256], [1, 2.95], [0.0, 0.5]]) n_pre_samples = 5 opt_results = [[ 'lang', 'artificial', 'avg_len', 'shannon', 'test_shannon', 'test_loss', 'test_acc', 'best_epoch', 'val_loss', 'val_acc', 'embedding_size', 'hidden_size', 'nlayers', 'dropout' ]] for i, lang in enumerate(languages): for artificial in [True, False]: print() print('%d. %s %s' % (i, lang, 'artificial' if artificial else 'default')) sample_loss = sample_loss_getter(lang, args.is_devoicing, token_map, ipa_to_concepts, args, artificial=artificial) xp, yp = bayesian_optimisation(n_iters, sample_loss, bounds, n_pre_samples=n_pre_samples) opt_results += [ get_optimal_loss(lang, args.is_devoicing, artificial, token_map, concept_ids, ipa_to_concepts, xp, yp, args) ] write_csv( results, '%s/artificial__%s__baysian-results.csv' % (args.rfolder, args.model)) write_csv( opt_results, '%s/artificial__%s__opt-results.csv' % (args.rfolder, args.model)) write_csv( results, '%s/artificial__%s__baysian-results-final.csv' % (args.rfolder, args.model))
def leaf_tuning(): print('-- Beginning leaf Search') bounds = np.array([[2, 100], [10, 200]]) results = bayesian_optimisation(n_iters=12, sample_loss=sample_loss_leaf, bounds=bounds, gp_params={ 'kernel': Matern(), 'alpha': 1e-5, 'n_restarts_optimizer': 10, 'normalize_y': True }) print('Best Neighbors: %s, Best Leafs: %s, Best score: %s' % (results[0][list(results[1]).index(max( results[1]))][0], results[0][list(results[1]).index( max(results[1]))][1], max(results[1]))) return results[0][list(results[1]).index(max(results[1]))]
def hyper_parameter_tuning(): print('Searching hyper parameters') result_list = [] params_list = [] for ker in [RBF(), RationalQuadratic(), Matern()]: #ExpSineSquared(), print('-- Beginning Gaussian Search with %s' % (ker)) bounds = np.array([[.6, 1], [1000, 2000], [1, 200], [10, 200], [.4, 1]]) results = bayesian_optimisation( n_iters=15, sample_loss=sample_loss_hyperparameters, bounds=bounds, gp_params={ 'kernel': ker, 'alpha': 1e-5, 'n_restarts_optimizer': 10, 'normalize_y': True }) print('Kernel: %s, Best score: %s' % (ker, max(results[1]))) result_list.append(results[1]) params_list.append(results[0]) return result_list, params_list
# model.fit(trainx, trainy) # pred = model.predict(testx) # acc = accuracy_score(testy, pred) # f1 = f1_score(testy, pred) # roc = roc_auc_score(testy, pred) # prec = precision_score(testy, pred) # rec = recall_score(testy, pred) # combination = np.mean([acc, f1, roc, prec, rec]) # print(combination) bounds = np.array([[0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2]]) start = [[1, 1, 1, 1, 1, 1, 1, 1, 1]] results = bayesian_optimisation(n_iters=100, sample_loss=sample_loss, bounds=bounds, x0=start) scores = results[1] #max(scores) = 0.81404958677685946 #index = list(scores).index(max(scores)) #results[0][index] = [ 0.43409737, 1.73946525, 0.22288511, 0.66346655, 1.99013394, 1.74626956, 0.19906775, 1.86856138, 0.10190556] #max(scores) = 0.81404958677685946 #index = list(scores).index(max(scores)) #results[0][index] = [ 0.43409737, 1.73946525, 0.22288511, 0.66346655, 1.99013394, 1.74626956, 0.19906775, 1.86856138, 0.10190556] scores = pd.DataFrame() scores['threshold'] = threshold scores['accuracy'] = acc scores['f1'] = f1
transformed_maxes = [t(v) for t, v in zip(transforms, maxes)] bounds = np.array([transformed_mins, transformed_maxes]).T def itransform(params): return [it(v) for it, v in zip(itransforms, params)] def params_to_hyperparams(params): return {name: value for name, value in zip(names, itransform(params))} def sample_loss(params): hyperparameters = params_to_hyperparams(params) print("Parameters:", hyperparameters) task = Task(target_pos=np.array([0., 0., 10.])) agent = ddpg.create(task, hyperparameters=hyperparameters) results = runner.train(task, agent, nb_epochs=N_EPOCHS, nb_train_steps_per_epoch=1) best_reward = results.max() print("Reward:", best_reward) return best_reward xp, yp = gp.bayesian_optimisation(n_iters=N_ITERS, sample_loss=sample_loss, bounds=bounds, n_pre_samples=PRE_SAMPLES)
data, target = make_classification(n_samples=2500, n_features=45, n_informative=15, n_redundant=5) def sample_loss(params): return cross_val_score(SVC(C=10 ** params[0], gamma=10 ** params[1], random_state=12345), X=data, y=target, scoring='roc_auc', cv=3).mean() lambdas = np.linspace(1, -4, 5) gammas = np.linspace(1, -4, 4) # We need the cartesian combination of these two vectors param_grid = np.array([[C, gamma] for gamma in gammas for C in lambdas]) real_loss = [] for params in param_grid: real_loss.append(sample_loss(params)) print len(real_loss) # The maximum is at: print param_grid[np.array(real_loss).argmax(), :] bounds = np.array([[-4, 1], [-4, 1]]) xp, yp = gp.bayesian_optimisation(n_iters=30, sample_loss=sample_loss, bounds=bounds, n_pre_samples=3, random_search=100000) print xp,yp
# rc('text', usetex=True) # # C, G = np.meshgrid(lambdas, gammas) # plt.figure() # cp = plt.contourf(C, G, np.array(real_loss).reshape(C.shape)) # plt.colorbar(cp) # plt.title('Filled contours plot of loss function $\mathcal{L}$($\gamma$, $C$)') # plt.xlabel('$C$') # plt.ylabel('$\gamma') # plt.savefig('/Users/ilham/Documents/gp-optimisation/figures/real_loss_contour.png', bbox_inches='tight') # plt.show() from matplotlib import rc bounds = np.array([[-4, 1], [-4, 1]]) xp, yp = gp.bayesian_optimisation(n_iters=30, sample_loss=sample_loss, bounds=bounds, n_pre_samples=3, random_search=100000) print(xp) print(yp) rc('text', usetex=False) plotters.plot_iteration(lambdas, xp, yp, first_iter=3, second_param_grid=gammas, optimum=[0.58333333, -2.15789474])
#weights = [start1, start2] #for weight in weights: # model = VotingClassifier(estimators = keeplist, weights = weight, voting = 'soft') # model.fit(trainx, trainy) # pred = model.predict(testx) # acc = accuracy_score(testy, pred) # f1 = f1_score(testy, pred) # roc = roc_auc_score(testy, pred) # prec = precision_score(testy, pred) # rec = recall_score(testy, pred) # combination = np.mean([acc, f1, roc, prec, rec]) # print(combination) bounds = np.array([[0, 5], [0, 5], [0, 5], [0, 5], [0, 5], [0, 5]]) start = [[1, 1, 1, 1, 1, 1]] bayesian_optimisation(n_iters=30, sample_loss=sample_loss, bounds=bounds, x0=start) scores = pd.DataFrame() scores['threshold'] = threshold scores['accuracy'] = acc scores['f1'] = f1 scores['roc_auc'] = roc scores['precision'] = prec scores['recall'] = rec scores['logloss'] = logloss print(scores)