Beispiel #1
0
    params: list of parameters (initial values)
    """
    for e_i in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:

            x = feats_to_vec(features)  # convert features to a vector.
            y = L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            params[0] -= (learning_rate * grads[0])
            params[1] -= (learning_rate * grads[1])

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print(e_i, train_loss, train_accuracy, dev_accuracy)
    return params


if __name__ == '__main__':
    train_data = TRAIN
    dev_data = DEV

    params = ll.create_classifier(len(F2I), len(L2I))
    num_iterations = 100
    learning_rate = 10**-4
    trained_params = train_classifier(train_data, dev_data, num_iterations,
                                      learning_rate, params)
Beispiel #2
0
    return params

if __name__ == '__main__':
    # YOUR CODE HERE
    # write code to load the train and dev sets, set up whatever you need,
    # and call train_classifier.
    TRAIN = [(l, text_to_bigrams(t)) for l, t in read_data(open('train', 'r'))]
    train_vocab = vocabu(TRAIN)
    L2I = {l: i for i, l in enumerate(list(sorted(set([l for l, t in TRAIN]))))}
    F2I = {f: i for i, f in enumerate(list(sorted(vocabu(TRAIN))))}


    features=[]
    for idx in range(len(TRAIN)): #take the features out of TRAIN
        features.append(TRAIN[idx][-1])
    ##print(F2I[features[1][3]])
    features_vec=feats_to_vec(features,F2I)
    y=labels_to_y(TRAIN,L2I)
    in_dim = len(F2I)
    out_dim = len(L2I)
    params = create_classifier(in_dim, out_dim)
   # y=y.reshape(-1)
    features_vec=np.array(features_vec)
    train_data=np.concatenate((y,features_vec),axis=1)
    #p=random.shuffle(train_data);
    # train_classifier
    # ...
    params = create_classifier(in_dim, out_dim)
    trained_params = train_classifier(train_data, 5, 0.01, params)

Beispiel #3
0
    test classifier with test data - no labels

    params - the trained params
    """
    fd = open("test.pred.ll", 'w')
    counter = 0
    test_ans = ''
    test_data = ut.read_data('test')
    for label, feature in test_data:
        pred = ll.predict(feats_to_vec(feature), parameters)
        for l, i in ut.L2I.items():
            if i == pred:
                test_ans = l
        counter += 1
        fd.write(test_ans + "\n")
    fd.close()


if __name__ == '__main__':
    # write code to load the train and dev sets, set up whatever you need,
    # and call train_classifier.

    train_data = ut.read_data('train')
    dev_data = ut.read_data('dev')

    params = ll.create_classifier(len(ut.F2I), len(ut.L2I))
    trained_params = train_classifier(train_data, dev_data, EPOCH, LR, params)
    print 'the final params are:\nW =\n', trained_params[
        0], '\nb =\n', trained_params[1]
    #test(trained_params)
Beispiel #4
0
    params: list of parameters (initial values)
    """
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = label  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            params = params - learning_rate * grads
        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params


if __name__ == '__main__':
    TRAIN = [(l, ut.text_to_bigrams(t)) for l, t in ut.read_data("train")]
    DEV = [(l, ut.text_to_bigrams(t)) for l, t in ut.read_data("dev")]

    from collections import Counter

    fc = Counter()
    for l, feats in TRAIN:
        fc.update(feats)

    params = ll.create_classifier(ut.vocab, 12)
    trained_params = train_classifier(TRAIN, DEV, 10000, 0.5, params)
        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print(I, train_loss, train_accuracy, dev_accuracy)
    return params

if __name__ == '__main__':
    from utils import TRAIN as train_data
    from utils import DEV as dev_data
    from utils import TEST as test_data
    from utils import I2L

    num_iterations = 10
    learning_rate = 1e-3
    in_dim = len(F2I)
    out_dim = len(L2I)

    params = ll.create_classifier(in_dim, out_dim)
    trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)

    preds = []
    for features in test_data:
        x = feats_to_vec(features)
        preds.append(ll.predict(x, trained_params))

    # with open('test.pred', 'w') as f:
    #     for y_hat in preds:
    #         f.write(f'{I2L[y_hat]}\n')