Ejemplo n.º 1
0
def grid_search():
    """
    Do a grid search over lambda for the optimal learning rate.
    """

    lambdas = [1e2, 1e3, 1e4, 1e5]

    # load data
    train_labels_og = "./dataset/trainingLabels.txt"
    train_sentences_og = "./dataset/trainingSentences.txt"

    train_labels, train_sentences, validation_labels, validation_sentences = sr.shuffle_examples(
        train_labels_og, train_sentences_og, pct_train=0.5
    )

    gridfile = open("gridsearch_results.txt", "w")
    # run over values of lambda
    for lr in lambdas:
        print lr
        weights, scores, epoch, ept_avg = SGD_train(
            train_labels[:100],
            train_sentences[:100],
            20,
            validation_labels[:100],
            validation_sentences[:100],
            "word",
            lr,
        )
        print scores
        gridfile.write(str(scores[-2]) + "\n")

    gridfile.close()
Ejemplo n.º 2
0
import SGD_CRF
import subroutines as sr
import dataproc as dp
import numpy as np

# first load and shuffle the data
train_labels_og = './dataset/trainingLabels.txt'
train_sentences_og = './dataset/trainingSentences.txt'

train_labels, train_sentences, validation_labels, validation_sentences = sr.shuffle_examples(train_labels_og, train_sentences_og, pct_train=0.5)

weights, scores, epoch, ept_avg = SGD_CRF.SGD_train(train_labels, train_sentences, 20,
                                                    validation_labels, validation_sentences, 'word')

np.save('sgd_w_0p5.npy', np.array(weights))
np.save('sgd_s_0p5.npy', np.array(scores))
np.save('sgd_ep_0p5.npy', np.array([epoch,ept_avg]))