예제 #1
0
def test_predictions(dataset, params):
    with open('test.pred', 'w') as file:
        for index, (label, features) in enumerate(dataset):
            x = feats_to_vec(features)
            pred = model.predict(x, params)
            if not index == 0:
                file.write('\n')
            file.write(ut.I2L[pred])
예제 #2
0
def test_classifier(test_data, test_raw, trained_params):
    f = open("test.pred", "w")
    for i, features in enumerate(test_data):
        x = feats_to_vec(features)
        category = mlpn.predict(x, trained_params)
        #print i, I2L[category], test_raw[i]
        f.write(I2L[category] + "\n")
    f.close()
예제 #3
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        if mlpn.predict(features, params) == label:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
def predict_on_test(params):
    with open('test.pred', 'w') as f:
        test_set = load_test_set('test', 'bigrams')

        for _, features in test_set:
            x = feats_to_vec(features)
            pred = mlp.predict(x, params)
            print(index_to_lang(pred), file=f)
예제 #5
0
def test(test_data, params):
    prediction_file = open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)  # convert features to a vector.
        pred = ml.predict(x, params)
        for key, val in ut.L2I.items():
            if val == pred:
                label = key
                break
        prediction_file.write(str(label) + "\n")
    prediction_file.close()
예제 #6
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)
        y = ut.L2I[label]
        pred = model.predict(x, params)
        if pred == y:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
예제 #7
0
def accuracy_on_dataset(dataset, params, F2I):
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features, F2I)  # convert features to a vector.
        y = ut.L2I[label]           # convert the label to number
        if mn.predict(x, params) == y:
            good += 1
        else:
            bad += 1

    return good / (good + bad)
예제 #8
0
def run_test(test_data, params):
    pred_file = open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)  # convert features to a vector.
        y_hat = ml.predict(x, params)
        for key, val in ut.L2I.items(
        ):  # for name, age in dictionary.iteritems():  (for Python 2.x)
            if val == y_hat:
                label = key
                break
        pred_file.write(str(label) + "\n")
    pred_file.close()
예제 #9
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        x = feats_to_vec(features)
        y_pred = mlpn.predict(x, params)
        good += 1 if y_pred==L2I[label] else 0
        bad += 1 if y_pred!=L2I[label] else 0
    return good / (good + bad)
예제 #10
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        if utils.L2I[label] == mlpn.predict(feats_to_vec(features), params):
            good = good + 1
        else:
            bad = bad + 1
    return good / (good + bad)
def pred(pred_data, params):
    """ Test classifier
    """

    I2L = {utils.L2I[l]: l for l in utils.L2I}

    with open("test.pred", "w+") as file:
        for features in pred_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y_hat = mlpn.predict(x, params)
            file.write(I2L[y_hat])
            file.write("\n")
def accuracy_on_dataset(dataset, params):
    # in case of no data set, like xor
    if not dataset:
        return 0

    good = bad = 0.0
    for label, features in dataset:
        y_prediction = mlpn.predict(features, params)
        if y_prediction == label:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
예제 #13
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        feats_vec = feats_to_vec(features)
        prediction = mlpn.predict(feats_vec, params)
        if prediction == L2I[label]:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
예제 #14
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:

        x = feats_to_vec(features)
        y = utils.L2I[label]

        y_hat = mlpn.predict(x, params)
        if y == y_hat:
            good += 1
        else:
            bad += 1
        pass
    return good / (good + bad)
예제 #15
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)  # convert features to a vector.
        y = ut.L2I[label]  # convert the label to number if needed.
        pred = ml.predict(x, params)
        if (y == pred):
            good += 1
        else:
            bad += 1
            # Compute the accuracy (a scalar) of the current parameters
            # on the dataset.
            # accuracy is (correct_predictions / all_predictions)
    return good / (good + bad)
예제 #16
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        x = feats_to_vec(features)  # convert features to a vector.
        y_hat = mlp_n.predict(x, params)
        if y_hat == ut.L2I[label]:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
예제 #17
0
파일: train_mlpn.py 프로젝트: machouz/DL1
def accuracy_on_dataset(dataset, params):
    good = total = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)  # convert features to a vector.
        y = L2I.get(label)  # convert the label to number if needed.
        if mlpn.predict(
                x,
                params) == y:  # compare the prediction and the correct label
            good += 1
        total += 1
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
    return good / total
예제 #18
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)

        pred_label = mlpn.predict(features, params)
        if pred_label == label:
            good += 1
        else:
            bad += 1

    return good / (good + bad)
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        feat_vec = feats_to_vec(features)
        y_hat = mlp.predict(feat_vec, params)

        if label == y_hat:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
예제 #20
0
def test(parameters):
    """
    test classifier with test data - no labels

    params - the trained params
    """
    # fd = open("test.pred", 'w')
    counter = 0
    test_ans = ''
    test_data = ut.read_data('test')
    for label, feature in test_data:
        pred = mlpn.predict(feats_to_vec(feature), parameters)
        for l,i in ut.L2I.items():
            if i == pred:
                test_ans = l
        counter += 1
예제 #21
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions
        x = feats_to_vec(features)
        y = L2I[label]
        y_hat = ll.predict(x, params)
        if y - y_hat == 0:
            good += 1
        else:
            bad += 1
        pass
    return good / (good + bad)
예제 #22
0
def create_test_pred_file(test_data, params):
    """
    creates a 'test.pred' file
    :param test_data: test data to be predicted
    :param params: trained params
    :return:
    """
    f = open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)
        y_hat = mlpn.predict(x, params)
        for l, i in utils.L2I.items():
            if y_hat == i:
                label = l
                break
        f.write(label + "\n")
    f.close()
예제 #23
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    local_l2i = ut.L2I
    for label, features in dataset:
        feat_vec = feats_to_vec(features)
        y_hat = mlpn.predict(feat_vec, params)

        if local_l2i[label] == y_hat:
            good += 1
        else:
            bad += 1

    # Compute the accuracy (a scalar) of the current parameters
    # on the dataset.
    # accuracy is (correct_predictions / all_predictions)

    return good / (good + bad)
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0

    for label, features in dataset:

        # YOUR CODE HERE
        x = feats_to_vec(features)  # convert features to a vector.
        y = label  # convert the label to number if needed.

        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        if mlp.predict(x, params) == y:
            good += 1
        else:
            bad += 1

    return good / (good + bad)
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)

        label = L2I[label]
        features = feats_to_vec(features)

        if mlpn.predict(features, params) == label:
            good += 1
        else:
            bad += 1

        pass
    accuracy = good / (good + bad)

    return accuracy
    def fileData(fData):
        data = []
        for [lang, bigrams] in fData:
            features = np.zeros(len(all_bigrams))
            for bigram in bigrams:
                if bigram in all_bigrams:
                    features[all_bigrams[bigram]] += 1
            language = all_langs[lang] if lang in all_langs else -1
            data.append([language, features])
        return data

# process the training and dev data and print accuracy
#params = mlp.create_classifier(len(all_bigrams), int(math.log(len(all_bigrams) * len(all_langs))), len(all_langs))

    params = mlp.create_classifier(
        [len(all_bigrams), 20, 30, 40, 10,
         len(all_langs)])
    trained_params = train_classifier(fileData(utils.TRAIN),
                                      fileData(utils.DEV), num_iterations,
                                      learning_rate, params)

    # run prediction on the test data
    predict = []
    for [label, data] in fileData(utils.TEST):
        predict.append(lang_to_id[mlp.predict(data, trained_params)])
""" In comment in order not to run over the file
# write the prediction to a file
    predict_file = open('test.pred', 'w')
    predict_file.writelines(["%s\n" % item  for item in predict])
    predict_file.close()
"""
"""
Written by Ari Bornstein
"""

import pickle
import utils
import mlpn as mlp

params = pickle.load(open("model.p", "rb"))
test = utils.TEST_BIGRAMS
out = open(r'..\data\test.pred.', 'w')
for x in test:
    pred = utils.I2L[mlp.predict(x, params)]
    out.write("{}\n".format(pred))
out.close()
예제 #28
0
        for b in bs:
            if b not in bigrams:
                bigrams[b] = i
                i += 1

    def dataFromFile(fileData):
        data = []
        for [lang, bs] in fileData:
            features = np.zeros(len(bigrams))
            for b in bs:
                if b in bigrams:
                    features[bigrams[b]] += 1
            language = languages[lang] if lang in languages else -1
            data.append([language, features])
        return data

    trainData = dataFromFile(utils.TRAIN)
    devData = dataFromFile(utils.DEV)
    testData = dataFromFile(utils.TEST)

    params = mlpn.create_classifier([len(bigrams), 13, 13, len(languages)])
    trainedParams = train_classifier(trainData, devData, 20, 0.01, params)

    predictions = []
    for [label, data] in testData:
        predictions.append(languagesBack[mlpn.predict(data, trainedParams)])

    outF = open('test.pred', 'w')
    outF.write("\n".join(predictions))
    outF.close()
예제 #29
0
        print(I, train_loss, train_accuracy, dev_accuracy)
    return params

if __name__ == '__main__':
    # YOUR CODE HERE
    # write code to load the train and dev sets, set up whatever you need,
    # and call train_classifier.
    
    from utils import TRAIN as train_data
    from utils import DEV as dev_data
    from utils import TEST as test_data
    from utils import L2I, I2L, F2I

    num_iterations = 10
    learning_rate = 1e-2

    dims = [len(F2I), 40, len(L2I)]

    params = mlpn.create_classifier(dims)
    trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)

    preds = []
    for features in test_data:
        x = feats_to_vec(features)
        preds.append(mlpn.predict(x, trained_params))

    # with open('test.pred', 'w') as f:
    #     for y_hat in preds:
    #         f.write(f'{I2L[y_hat]}\n')

예제 #30
0
def accuracy_on_dataset(dataset, params):
    y_y_hat = [(y, pn.predict(x, params)) for x, y in dataset]
    if config.debug:
        print("yhat counter: {}".format(Counter([x[1] for x in y_y_hat])))
    is_good = [a == b for a, b in y_y_hat]
    return sum(is_good) / len(is_good)