Esempio n. 1
0
def tune(train_fn, param_vals, train_feats, train_labels, val_feats,
         val_labels):
    train_accs = np.ndarray(len(param_vals))
    val_accs = np.ndarray(len(param_vals))

    for i, val in enumerate(param_vals):
        theta, theta_0 = train_fn(train_feats, train_labels, val)

        train_preds = p1.classify(train_feats, theta, theta_0)
        train_accs[i] = p1.accuracy(train_preds, train_labels)

        val_preds = p1.classify(val_feats, theta, theta_0)
        val_accs[i] = p1.accuracy(val_preds, val_labels)

    return train_accs, val_accs
Esempio n. 2
0
def check_extract_bow_feature_vectors():
    ex_name = "Extract bow feature vectors"
    texts = ["He loves her ", "He really really loves her"]
    keys = ["he", "loves", "her", "really"]
    dictionary = {k: i for i, k in enumerate(keys)}
    exp_res = np.array([[1, 1, 1, 0], [1, 1, 1, 1]])
    non_bin_res = np.array([[1, 1, 1, 0], [1, 1, 1, 2]])

    try:
        res = p1.extract_bow_feature_vectors(texts, dictionary)
    except NotImplementedError:
        log(red("FAIL"), ex_name, ": not implemented")
        return

    if not type(res) == np.ndarray:
        log(red("FAIL"), ex_name, ": does not return a numpy array, type: ",
            type(res))
        return
    if not len(res) == len(exp_res):
        log(red("FAIL"), ex_name, ": expected an array of shape ",
            exp_res.shape, " but got array of shape", res.shape)
        return

    log(green("PASS"), ex_name)

    if (res == exp_res).all():
        log(yellow("WARN"), ex_name, ": uses binary indicators as features")
    elif (res == non_bin_res).all():
        log(green("PASS"), ex_name, ": correct non binary features")
    else:
        log(red("FAIL"), ex_name, ": unexpected feature matrix")
        return
Esempio n. 3
0
def check_bag_of_words():
    ex_name = "Bag of words"

    texts = ["He loves to walk on the beach", "There is nothing better"]

    try:
        res = p1.bag_of_words(texts)
    except NotImplementedError:
        log(red("FAIL"), ex_name, ": not implemented")
        return
    if not type(res) == dict:
        log(red("FAIL"), ex_name, ": does not return a tuple, type: ",
            type(res))
        return

    vals = sorted(res.values())
    exp_vals = list(range(len(res.keys())))
    if not vals == exp_vals:
        log(red("FAIL"), ex_name, ": wrong set of indices. Expected: ",
            exp_vals, " got ", vals)
        return

    log(green("PASS"), ex_name, "")

    keys = sorted(res.keys())
    exp_keys = [
        'beach', 'better', 'he', 'is', 'loves', 'nothing', 'on', 'the',
        'there', 'to', 'walk'
    ]
    stop_keys = ['beach', 'better', 'loves', 'nothing', 'walk']

    if keys == exp_keys:
        log(yellow("WARN"), ex_name, ": does not remove stopwords:",
            [k for k in keys if k not in stop_keys])
    elif keys == stop_keys:
        log(green("PASS"), ex_name, " stopwords removed")
    else:
        log(red("FAIL"), ex_name, ": keys are missing:",
            [k for k in stop_keys if k not in keys], " or are not unexpected:",
            [k for k in keys if k not in stop_keys])
Esempio n. 4
0
#-------------------------------------------------------------------------------
# Data loading
#-------------------------------------------------------------------------------

train_data = utils.load_data('reviews_train.tsv')
val_data = utils.load_data('reviews_val.tsv')
test_data = utils.load_data('reviews_test.tsv')

train_texts, train_labels = zip(*((sample['text'], sample['sentiment'])
                                  for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Calculate theta & theta_0 using each algorithm
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
Esempio n. 5
0
 def train_fn(features, labels, L):
     return p1.pegasos(features, labels, best_T, L)
Esempio n. 6
0
 def train_fn(features, labels, T):
     return p1.pegasos(features, labels, T, best_L)