Example #1
0
def _run_language_bayesian(lang,
                           train_loader,
                           val_loader,
                           test_loader,
                           token_map,
                           args,
                           max_order=3):
    vocab_size = len(token_map)

    full_avg_len = np.mean([len(x) for x in (train_loader)] +
                           [len(x) for x in (val_loader)] +
                           [len(x) for x in (test_loader)]) - 2
    avg_len = np.mean([len(x) for x in (train_loader)]) - 2

    n_iters = 45
    unigram_min_max = [[0.1, 1.]]
    ngram_min_max = [[0.0, 1.]] * (max_order - 1)
    bounds = np.array(unigram_min_max + ngram_min_max)
    n_pre_samples = 5

    sample_loss = sample_loss_getter(lang, train_loader, val_loader,
                                     test_loader, token_map, args)
    xp, yp = bayesian_optimisation(n_iters,
                                   sample_loss,
                                   bounds,
                                   n_pre_samples=n_pre_samples)

    test_loss, val_loss, opt_params = get_optimal_loss(train_loader,
                                                       val_loader, test_loader,
                                                       vocab_size, xp, yp,
                                                       args)
    print('Test loss: %.4f  Val loss: %.4f' % (test_loss, val_loss))

    return full_avg_len, avg_len, test_loss, val_loss, opt_params, xp, yp
Example #2
0
def optimize_languages(args):
    print('------------------- Start -------------------')
    languages, token_map, data_split, concept_ids = read_info()
    print('Model %s' % args.model)
    print('Train %d, Val %d, Test %d' %
          (len(data_split[0]), len(data_split[1]), len(data_split[2])))

    n_iters = 45
    bounds = np.array([[4, 256], [32, 256], [1, 2.95], [0.0, 0.5]])
    n_pre_samples = 5

    sample_loss = sample_loss_getter(languages, token_map, concept_ids, args)
    xp, yp = bayesian_optimisation(n_iters,
                                   sample_loss,
                                   bounds,
                                   n_pre_samples=n_pre_samples)

    opt_results, test_results = get_optimal_loss(languages, token_map, xp, yp,
                                                 concept_ids, args)

    log_results = [[
        'lang', 'test_loss', 'test_acc', 'best_epoch', 'val_loss', 'val_acc',
        'embedding_size', 'hidden_size', 'nlayers', 'dropout'
    ]]
    log_results += [opt_results]
    log_results += [[]]
    for lang, result in test_results.items():
        log_results += [[lang] + list(result)]

    write_csv(
        log_results,
        '%s/%s__bayesian-shared-results.csv' % (args.rfolder, args.model))
Example #3
0
def gamma_tuning():
    print('-- Beginning C Search')
    bounds = np.array([[0, 1]])
    results = bayesian_optimisation(n_iters=5,  
                          sample_loss=sample_loss_gamma, 
                          bounds=bounds,
                          gp_params = {'kernel': Matern(), 'alpha': 1e-5, 'n_restarts_optimizer': 10, 'normalize_y': True})
    print('Best C: %s, Best score: %s' % (results[0][list(results[1]).index(max(results[1]))][0], max(results[1]))) 
    return results[0][list(results[1]).index(max(results[1]))][0]
Example #4
0
def find_feats():
    print('Searching for best number of features...')
    bounds = np.array([[1, len(list(x_data))]])
    start = [[len(list(x_data))]]
    results = bayesian_optimisation(n_iters=5,  
                          sample_loss=sample_loss_n_feats, 
                          bounds=bounds,
                          x0 = start,
                          gp_params = {'kernel': Matern(), 'alpha': 1e-5, 'n_restarts_optimizer': 10, 'normalize_y': True})
    return int(results[0][list(results[1]).index(max(results[1]))])
Example #5
0
def optimize_languages(args):
    print('------------------- Start -------------------')
    _, token_map, data_split, concept_ids, ipa_to_concepts = read_info()
    languages = get_languages(is_devoicing=args.is_devoicing)
    token_map = add_new_symbols_to_vocab(token_map)
    print('Model %s' % args.model)
    print('Train %d, Val %d, Test %d' %
          (len(data_split[0]), len(data_split[1]), len(data_split[2])))

    n_iters = 45
    bounds = np.array([[4, 256], [32, 256], [1, 2.95], [0.0, 0.5]])
    n_pre_samples = 5

    opt_results = [[
        'lang', 'artificial', 'avg_len', 'shannon', 'test_shannon',
        'test_loss', 'test_acc', 'best_epoch', 'val_loss', 'val_acc',
        'embedding_size', 'hidden_size', 'nlayers', 'dropout'
    ]]
    for i, lang in enumerate(languages):
        for artificial in [True, False]:
            print()
            print('%d. %s %s' %
                  (i, lang, 'artificial' if artificial else 'default'))

            sample_loss = sample_loss_getter(lang,
                                             args.is_devoicing,
                                             token_map,
                                             ipa_to_concepts,
                                             args,
                                             artificial=artificial)
            xp, yp = bayesian_optimisation(n_iters,
                                           sample_loss,
                                           bounds,
                                           n_pre_samples=n_pre_samples)

            opt_results += [
                get_optimal_loss(lang, args.is_devoicing, artificial,
                                 token_map, concept_ids, ipa_to_concepts, xp,
                                 yp, args)
            ]

            write_csv(
                results, '%s/artificial__%s__baysian-results.csv' %
                (args.rfolder, args.model))
            write_csv(
                opt_results, '%s/artificial__%s__opt-results.csv' %
                (args.rfolder, args.model))

    write_csv(
        results, '%s/artificial__%s__baysian-results-final.csv' %
        (args.rfolder, args.model))
Example #6
0
def leaf_tuning():
    print('-- Beginning leaf Search')
    bounds = np.array([[2, 100], [10, 200]])
    results = bayesian_optimisation(n_iters=12,
                                    sample_loss=sample_loss_leaf,
                                    bounds=bounds,
                                    gp_params={
                                        'kernel': Matern(),
                                        'alpha': 1e-5,
                                        'n_restarts_optimizer': 10,
                                        'normalize_y': True
                                    })
    print('Best Neighbors: %s, Best Leafs: %s, Best score: %s' %
          (results[0][list(results[1]).index(max(
              results[1]))][0], results[0][list(results[1]).index(
                  max(results[1]))][1], max(results[1])))
    return results[0][list(results[1]).index(max(results[1]))]
Example #7
0
def hyper_parameter_tuning():
    print('Searching hyper parameters')
    result_list = []
    params_list = []
    for ker in [RBF(), RationalQuadratic(), Matern()]:  #ExpSineSquared(),
        print('-- Beginning Gaussian Search with %s' % (ker))
        bounds = np.array([[.6, 1], [1000, 2000], [1, 200], [10, 200], [.4,
                                                                        1]])
        results = bayesian_optimisation(
            n_iters=15,
            sample_loss=sample_loss_hyperparameters,
            bounds=bounds,
            gp_params={
                'kernel': ker,
                'alpha': 1e-5,
                'n_restarts_optimizer': 10,
                'normalize_y': True
            })
        print('Kernel: %s, Best score: %s' % (ker, max(results[1])))
        result_list.append(results[1])
        params_list.append(results[0])
    return result_list, params_list
#    model.fit(trainx, trainy)
#    pred = model.predict(testx)
#    acc = accuracy_score(testy, pred)
#    f1 = f1_score(testy, pred)
#    roc = roc_auc_score(testy, pred)
#    prec = precision_score(testy, pred)
#    rec = recall_score(testy, pred)
#    combination = np.mean([acc, f1, roc, prec, rec])
#    print(combination)

bounds = np.array([[0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2], [0, 2],
                   [0, 2], [0, 2]])
start = [[1, 1, 1, 1, 1, 1, 1, 1, 1]]

results = bayesian_optimisation(n_iters=100,
                                sample_loss=sample_loss,
                                bounds=bounds,
                                x0=start)

scores = results[1]
#max(scores) = 0.81404958677685946
#index = list(scores).index(max(scores))
#results[0][index] = [ 0.43409737,  1.73946525,  0.22288511,  0.66346655,  1.99013394, 1.74626956,  0.19906775,  1.86856138,  0.10190556]

#max(scores) = 0.81404958677685946
#index = list(scores).index(max(scores))
#results[0][index] = [ 0.43409737,  1.73946525,  0.22288511,  0.66346655,  1.99013394, 1.74626956,  0.19906775,  1.86856138,  0.10190556]

scores = pd.DataFrame()
scores['threshold'] = threshold
scores['accuracy'] = acc
scores['f1'] = f1
Example #9
0
transformed_maxes = [t(v) for t, v in zip(transforms, maxes)]
bounds = np.array([transformed_mins, transformed_maxes]).T


def itransform(params):
    return [it(v) for it, v in zip(itransforms, params)]


def params_to_hyperparams(params):
    return {name: value for name, value in zip(names, itransform(params))}


def sample_loss(params):
    hyperparameters = params_to_hyperparams(params)
    print("Parameters:", hyperparameters)
    task = Task(target_pos=np.array([0., 0., 10.]))
    agent = ddpg.create(task, hyperparameters=hyperparameters)
    results = runner.train(task,
                           agent,
                           nb_epochs=N_EPOCHS,
                           nb_train_steps_per_epoch=1)
    best_reward = results.max()
    print("Reward:", best_reward)
    return best_reward


xp, yp = gp.bayesian_optimisation(n_iters=N_ITERS,
                                  sample_loss=sample_loss,
                                  bounds=bounds,
                                  n_pre_samples=PRE_SAMPLES)
Example #10
0
data, target = make_classification(n_samples=2500,
    n_features=45,
    n_informative=15,
    n_redundant=5)

def sample_loss(params):
    return cross_val_score(SVC(C=10 ** params[0], gamma=10 ** params[1], random_state=12345),
       X=data, y=target, scoring='roc_auc', cv=3).mean()

lambdas = np.linspace(1, -4, 5)
gammas = np.linspace(1, -4, 4)

# We need the cartesian combination of these two vectors
param_grid = np.array([[C, gamma] for gamma in gammas for C in lambdas])
real_loss = []
for params in param_grid:
    real_loss.append(sample_loss(params))
    print len(real_loss)

# The maximum is at:
print param_grid[np.array(real_loss).argmax(), :]
bounds = np.array([[-4, 1], [-4, 1]])
xp, yp = gp.bayesian_optimisation(n_iters=30,
    sample_loss=sample_loss,
    bounds=bounds,
    n_pre_samples=3,
    random_search=100000)

print xp,yp

# rc('text', usetex=True)
#
# C, G = np.meshgrid(lambdas, gammas)
# plt.figure()
# cp = plt.contourf(C, G, np.array(real_loss).reshape(C.shape))
# plt.colorbar(cp)
# plt.title('Filled contours plot of loss function $\mathcal{L}$($\gamma$, $C$)')
# plt.xlabel('$C$')
# plt.ylabel('$\gamma')
# plt.savefig('/Users/ilham/Documents/gp-optimisation/figures/real_loss_contour.png', bbox_inches='tight')
# plt.show()

from matplotlib import rc
bounds = np.array([[-4, 1], [-4, 1]])

xp, yp = gp.bayesian_optimisation(n_iters=30,
                                  sample_loss=sample_loss,
                                  bounds=bounds,
                                  n_pre_samples=3,
                                  random_search=100000)

print(xp)
print(yp)
rc('text', usetex=False)
plotters.plot_iteration(lambdas,
                        xp,
                        yp,
                        first_iter=3,
                        second_param_grid=gammas,
                        optimum=[0.58333333, -2.15789474])
#weights = [start1, start2]
#for weight in weights:
#    model = VotingClassifier(estimators = keeplist, weights = weight, voting = 'soft')
#    model.fit(trainx, trainy)
#    pred = model.predict(testx)
#    acc = accuracy_score(testy, pred)
#    f1 = f1_score(testy, pred)
#    roc = roc_auc_score(testy, pred)
#    prec = precision_score(testy, pred)
#    rec = recall_score(testy, pred)
#    combination = np.mean([acc, f1, roc, prec, rec])
#    print(combination)

bounds = np.array([[0, 5], [0, 5], [0, 5], [0, 5], [0, 5], [0, 5]])
start = [[1, 1, 1, 1, 1, 1]]

bayesian_optimisation(n_iters=30,
                      sample_loss=sample_loss,
                      bounds=bounds,
                      x0=start)

scores = pd.DataFrame()
scores['threshold'] = threshold
scores['accuracy'] = acc
scores['f1'] = f1
scores['roc_auc'] = roc
scores['precision'] = prec
scores['recall'] = rec
scores['logloss'] = logloss

print(scores)