コード例 #1
0
def feats_to_vec(features):
    bigrams = utils.text_to_bigrams(features)
    feature_vector = np.zeros(len(utils.vocab))
    for b in bigrams:
        if b in utils.vocab:
            feature_vector[utils.F2I[b]] += 1
    return feature_vector / len(bigrams)
コード例 #2
0
def feats_to_vec(features):
    counters = defaultdict(int)
    bigrams = ut.text_to_bigrams(features)
    for bigram in bigrams:
        id = ut.F2I(bigram)
        counters[id] += 1
    # Should return a numpy vector of features.
    return counters
コード例 #3
0
ファイル: train_mlp1.py プロジェクト: greensd4/DLAss1
def feats_to_vec(features):
    features = ut.text_to_bigrams(features)
    feat_vec = np.array(np.zeros(len(ut.F2I)))
    matches_counter = 0
    for bigram in features:
        if bigram in ut.F2I:
            feat_vec[ut.F2I[bigram]] += 1
            matches_counter += 1
    return np.divide(feat_vec, matches_counter)
コード例 #4
0
ファイル: train_loglin.py プロジェクト: greensd4/DLAss1
def feats_to_vec(features):
    # Should return a numpy vector of features.
    feats = ut.text_to_bigrams(features)
    feat_vec = np.array(np.zeros(len(ut.F2I)))
    matches = 0
    for bigram in feats:
        if bigram in ut.F2I:
            feat_vec[ut.F2I[bigram]] += 1
            matches += 1
    return np.divide(feat_vec, matches)
コード例 #5
0
def feats_to_vec(features):
    """
    Calculates most common features histogram.
    features: list of features.
    """
    bigrams = utils.text_to_bigrams(features)
    feat_vec = np.zeros(len(utils.vocab))
    for b in bigrams:
        if b in utils.vocab:
            feat_vec[utils.F2I[b]] += 1
    return feat_vec/len(bigrams)
コード例 #6
0
    :return:
    """
    f = open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)
        y_hat = ll.predict(x, params)
        for l, i in utils.L2I.items():
            if y_hat == i:
                label = l
                break
        f.write(label + "\n")
    f.close()


LR = 0.001
NUM_ITERATIONS = 30

if __name__ == '__main__':
    # YOUR CODE HERE
    # write code to load the train and dev sets, set up whatever you need,
    # and call train_classifier.

    # ...

    params = ll.create_classifier(len(utils.F2I), len(utils.L2I))
    trained_params = train_classifier(utils.TRAIN, utils.DEV, NUM_ITERATIONS,
                                      LR, params)

    TEST = [(l, utils.text_to_bigrams(t)) for l, t in utils.read_data("test")]
    create_test_pred_file(TEST, trained_params)
コード例 #7
0
            W, b = params
            params[0] = grad_W*learning_rate + params[0]
            params[1] =


        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        #dev_accuracy = accuracy_on_dataset(dev_data, params)
        #print (I, train_loss, train_accuracy, dev_accuracy)
    return params

if __name__ == '__main__':
    # YOUR CODE HERE
    # write code to load the train and dev sets, set up whatever you need,
    # and call train_classifier.
    TRAIN = [(l, text_to_bigrams(t)) for l, t in read_data(open('train', 'r'))]
    train_vocab = vocabu(TRAIN)
    L2I = {l: i for i, l in enumerate(list(sorted(set([l for l, t in TRAIN]))))}
    F2I = {f: i for i, f in enumerate(list(sorted(vocabu(TRAIN))))}


    features=[]
    for idx in range(len(TRAIN)): #take the features out of TRAIN
        features.append(TRAIN[idx][-1])
    ##print(F2I[features[1][3]])
    features_vec=feats_to_vec(features,F2I)
    y=labels_to_y(TRAIN,L2I)
    in_dim = len(F2I)
    out_dim = len(L2I)
    params = create_classifier(in_dim, out_dim)
   # y=y.reshape(-1)
コード例 #8
0
    params: list of parameters (initial values)
    """
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = label  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            params = params - learning_rate * grads
        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params


if __name__ == '__main__':
    TRAIN = [(l, ut.text_to_bigrams(t)) for l, t in ut.read_data("train")]
    DEV = [(l, ut.text_to_bigrams(t)) for l, t in ut.read_data("dev")]

    from collections import Counter

    fc = Counter()
    for l, feats in TRAIN:
        fc.update(feats)

    params = ll.create_classifier(ut.vocab, 12)
    trained_params = train_classifier(TRAIN, DEV, 10000, 0.5, params)