Ejemplo n.º 1
0
def test_nb_likelihood_ratio ():
    expected = 8.7916
    instance = {'good':1,'worst':4,OFFSET:1}
    pos_likelihood = predict (instance, weights_nb, ALL_LABELS)[1]["POS"]
    neg_likelihood = predict (instance, weights_nb, ALL_LABELS)[1]["NEG"]
    actual = neg_likelihood - pos_likelihood
    assert_almost_equals (expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))
Ejemplo n.º 2
0
def test_nb_likelihood_ratio():
    expected = 8.7916
    instance = {'good': 1, 'worst': 4, OFFSET: 1}
    pos_likelihood = predict(instance, weights_nb, ALL_LABELS)[1]["POS"]
    neg_likelihood = predict(instance, weights_nb, ALL_LABELS)[1]["NEG"]
    actual = neg_likelihood - pos_likelihood
    assert_almost_equals(expected,
                         actual,
                         places=3,
                         msg="UNEQUAL Expected:%f, Actual:%f" %
                         (expected, actual))
Ejemplo n.º 3
0
def test_d3_3a_nb():
    global x_tr_pruned, y_tr

    theta_nb = naive_bayes.estimate_nb(x_tr_pruned,y_tr,0.1)

    y_hat,scores = clf_base.predict(x_tr_pruned[55],theta_nb,labels)
    assert_almost_equals(scores['2000s'],-1840.5064690929203,places=3)
    eq_(y_hat,'1980s')

    y_hat,scores = clf_base.predict(x_tr_pruned[155],theta_nb,labels)
    assert_almost_equals(scores['1980s'], -2153.0199277981355, places=3)
    eq_(y_hat,'2000s')
Ejemplo n.º 4
0
def test_d3_3a_nb():
    global x_tr_pruned, y_tr

    theta_nb = naive_bayes.estimate_nb(x_tr_pruned, y_tr, 0.1)

    y_hat, scores = clf_base.predict(x_tr_pruned[55], theta_nb, labels)
    assert_almost_equals(scores['2000s'], -1840.5064690929203, places=3)
    eq_(y_hat, '1980s')

    y_hat, scores = clf_base.predict(x_tr_pruned[155], theta_nb, labels)
    assert_almost_equals(scores['1980s'], -2153.0199277981355, places=3)
    eq_(y_hat, '2000s')
Ejemplo n.º 5
0
def perceptron_update(x, y, weights, labels):
    """compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """
    y_hat, scores = predict(x, weights, labels)
    f_x_y = make_feature_vector(x, y)
    f_x_y_hat = make_feature_vector(x, y_hat)

    update = defaultdict(float)

    diffKeys = set(f_x_y.keys()) - set(f_x_y_hat.keys())

    for key in diffKeys:
        update[key] = f_x_y.get(key)

    diffKeys = set(f_x_y_hat.keys()) - set(f_x_y.keys())

    for key in diffKeys:
        update[key] = 0.0 - f_x_y_hat.get(key)

    return update
Ejemplo n.º 6
0
def test_nb_d3_3():
    global x_tr, y_tr, x_dv, y_dv, x_te

    # public
    theta_nb = naive_bayes.estimate_nb(x_tr,y_tr,0.1)
    y_hat,scores = clf_base.predict(x_tr[55],theta_nb,labels)
    assert_almost_equals(scores['science'],-949.406,places=2)
Ejemplo n.º 7
0
def perceptron_update(x, y, weights, labels):
    """
    compute the perceptron update for a single instance

    :param x: instance, a counter of base features
    :param y: label, strings
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """

    # update = f(x, y) - f(x, y_real)
    y_predicted, y_score = predict(x, weights, labels)

    update = defaultdict(float)

    f_predicted = make_feature_vector(x, y_predicted)
    f_real = make_feature_vector(x, y)

    features = set(f_predicted.keys())
    features = features.union(f_real.keys())

    for features in list(features):
        value = f_real[features] - f_predicted[features]
        if value != 0:
            update[features] = value

    return update
Ejemplo n.º 8
0
def perceptron_update(x,y,weights,labels):
    '''
    compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    '''
    updated_weights = defaultdict(float)
    y_pred, _ = predict(x, weights, labels)
    fxy = make_feature_vector(x, y)
    fxy_pred = make_feature_vector(x, y_pred)

    wrong_predictions = set(fxy.keys()).symmetric_difference(set(fxy_pred.keys()))
    for prediction in wrong_predictions:
        if prediction in fxy:
            updated_weights[prediction] = fxy.get(prediction)
        else:
            updated_weights[prediction] = - fxy_pred.get(prediction)

    return updated_weights
Ejemplo n.º 9
0
def test_nb_d3_3():
    global x_tr, y_tr, x_dv, y_dv, x_te

    # public
    theta_nb = naive_bayes.estimate_nb(x_tr,y_tr,0.1)
    y_hat,scores = clf_base.predict(x_tr[55],theta_nb,labels)
    assert_almost_equals(scores['science'],-949.406,places=2)
Ejemplo n.º 10
0
    def classify(words, all_tags):
        """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger

        :param words: list of words
        :param all_tags: all possible tags
        :returns: list of tags
        :rtype: list

        """
        # all_counters = {}
        # returnList = []
        # for x in words:
        #     all_counters[x]=1
        #     predict_v = clf_base.predict(all_counters,weights,all_tags)
        #     returnList.append(predict_v[0])
        # return returnList

        returnList = []
        counter = Counter(words)

        for x in words:
            predict_v = clf_base.predict({x: counter[x]}, weights, all_tags)
            returnList.append(predict_v[0])

        return returnList
Ejemplo n.º 11
0
def test_perceptron_prediction_actual_label():
    global wp
    actual = predict({'good': 1, 'worst': 4, OFFSET: 1}, wp, ALL_LABELS)[0]
    expected = "NEG"
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
Ejemplo n.º 12
0
def test_clf_base_d2_2():
    global x_tr, x_dv

    # public
    y_hat,scores = clf_base.predict(x_tr[5],hand_weights.theta_hand_original,labels)
    eq_(scores['iama'],0.1)
    eq_(scores['science'],5.0)
    eq_(y_hat,'science')
    eq_(scores['askreddit'],0.0)
Ejemplo n.º 13
0
def test_clf_base_d2_2():
    global x_tr, x_dv

    # public
    y_hat,scores = clf_base.predict(x_tr[5],hand_weights.theta_hand_original,labels)
    eq_(scores['iama'],0.1)
    eq_(scores['science'],5.0)
    eq_(y_hat,'science')
    eq_(scores['askreddit'],0.0)
Ejemplo n.º 14
0
def test_nb_prediction_actual_label():
    actual = predict({
        'good': 1,
        'worst': 4,
        OFFSET: 1
    }, weights_nb, ALL_LABELS)[0]
    expected = "NEG"
    eq_(expected,
        actual,
        msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
Ejemplo n.º 15
0
def makeClassifierTagger(weights):
    #Code here
    predict_tag = lambda word, alltags: clf_base.predict({
        word: 1,
        OFFSET: 1
    }, weights, alltags)[0]
    tagger = lambda words, alltags: [
        predict_tag(word, alltags) for word in words
    ]
    return tagger
Ejemplo n.º 16
0
def test_d2_2_predict():
    global x_tr_pruned, x_dv_pruned, y_dv

    y_hat,scores = clf_base.predict(x_tr_pruned[0],hand_weights.theta_hand,labels)
    eq_(scores['pre-1980'],0.1)
    assert_almost_equals(scores['2000s'],1.3,places=5)
    eq_(y_hat,'2000s')
    eq_(scores['1980s'],0.0)

    y_hat = clf_base.predict_all(x_dv_pruned,hand_weights.theta_hand,labels)
    assert_almost_equals(evaluation.acc(y_hat,y_dv),.3422222, places=5)
Ejemplo n.º 17
0
def perceptron_update(x, y, weights, labels):
    """compute the perceptron update for a single instance
    """
    update = defaultdict(float, {})
    predictedLabel, scores = predict(x, weights, labels)
    if predictedLabel != y:
        update.update(make_feature_vector(x, y))
        temp = make_feature_vector(x, predictedLabel)
        for x, y in temp.iteritems():
            temp[x] = -y
        update.update(temp)
    return update
Ejemplo n.º 18
0
def test_d2_2_predict():
    global x_tr_pruned, x_dv_pruned, y_dv

    y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand,
                                     labels)
    eq_(scores['pre-1980'], 0.1)
    assert_almost_equals(scores['2000s'], 1.3, places=5)
    eq_(y_hat, '2000s')
    eq_(scores['1980s'], 0.0)

    y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels)
    assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
Ejemplo n.º 19
0
def test_nb_prediction_scores_for_positive_label():
    actual = predict({
        'good': 1,
        'worst': 4,
        OFFSET: 1
    }, weights_nb, ALL_LABELS)[1]["POS"]
    expected = -39.8792
    assert_almost_equals(expected,
                         actual,
                         places=3,
                         msg="UNEQUAL Expected:%f, Actual:%f" %
                         (expected, actual))
Ejemplo n.º 20
0
    def classify(words, all_tags):
        """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger

        :param words: list of words
        :param all_tags: all possible tags
        :returns: list of tags
        :rtype: list

        """
        ret = []
        for word in words:
            ret.append(clf_base.predict({word: 1}, weights, all_tags)[0])
        return ret
Ejemplo n.º 21
0
def test_perceptron_prediction_scores_for_positive_label():
    global wp
    actual = predict({
        'good': 1,
        'worst': 4,
        OFFSET: 1
    }, wp, ALL_LABELS)[1]["POS"]
    expected = -190.0
    assert_almost_equals(expected,
                         actual,
                         places=3,
                         msg="UNEQUAL Expected:%f, Actual:%f" %
                         (expected, actual))
Ejemplo n.º 22
0
def test_avg_perceptron_prediction_scores_for_negative_label():
    global wap
    actual = predict({
        'good': 1,
        'worst': 4,
        OFFSET: 1
    }, wap, ALL_LABELS)[1]["NEG"]
    expected = 188.3729
    assert_almost_equals(expected,
                         actual,
                         places=1,
                         msg="UNEQUAL Expected:%f, Actual:%f" %
                         (expected, actual))
Ejemplo n.º 23
0
def compute_py(x,weights,labels):
    """compute probability P(y | x)

    :param x: base features
    :param weights: current weights 
    :param labels: list of all possible labels
    :returns: probability distribution p(y | x), represented as dict {label:p(label|x)}
    :rtype: dict

    """
    # hint: you should use clf_base.predict and logsumexp
    prob_dist = predict(x, weights, labels)[1]
    print prob_dist
    denom = logsumexp(prob_dist.values())
    for label in prob_dist:
        prob_dist[label] = np.exp(prob_dist[label] - denom)
    return prob_dist
def compute_py(x, weights, labels):
    """compute probability P(y | x)

    :param x: base features
    :param weights: current weights 
    :param labels: list of all possible labels
    :returns: probability distribution p(y | x), represented as dict {label:p(label|x)}
    :rtype: dict

    """
    cc = {}
    key, value = predict(x, weights, labels)
    for label in labels:
        topPart = np.exp(value[label])
        bottomPart = np.exp(logsumexp(value.values()))
        cc[label] = topPart / bottomPart
    return cc
Ejemplo n.º 25
0
    def classify(words, all_tags):
        """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger and using basefeatures for each token (just the token and the offset)

        :param words: list of words
        :param all_tags: all possible tags
        :returns: list of tags
        :rtype: list

        """
        counter = Counter()
        counter.update(words)
        aList = list()
        for word in words:
            base_features = {word: counter[word]}
            label, score = clf_base.predict(base_features, weights, all_tags)
            aList.append(label)
        return aList
Ejemplo n.º 26
0
def compute_py(x, weights, labels):
    """compute probability P(y | x)

    :param x: base features
    :param weights: current weights 
    :param labels: list of all possible labels
    :returns: probability distribution p(y | x), represented as dict {label:p(label|x)}
    :rtype: dict

    """
    y_test, scores = predict(x, weights, labels)
    denom_sum = logsumexp(scores.values())

    final_dict = {}
    for label in labels:
        final_dict[label] = np.exp(scores[label] - denom_sum)
    # hint: you should use clf_base.predict and logsumexp
    return final_dict
Ejemplo n.º 27
0
def perceptron_update(x,y,weights,labels):
    """compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """
    updates = defaultdict(float)
    prediction = predict(x, weights, labels)
    if prediction[0] != y:
        feature_vector = make_feature_vector(x, y)
        y_hat_feature_vector = make_feature_vector(x, prediction[0])
        for feature in feature_vector:
            updates[feature] = feature_vector[feature]
        for feature in y_hat_feature_vector:
            updates[feature] = -y_hat_feature_vector[feature]
    return updates
    def classify(words, all_tags):
        """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger and using basefeatures for each token (just the token and the offset)

        :param words: list of words
        :param all_tags: all possible tags
        :returns: list of tags
        :rtype: list

        """
        #print(words)
        #print(weights)
        weights_words=[word[1] for word in weights.keys()]
        for word in words:
            if word not in weights_words:
                weights[('NOUN',word)]=1.
                for tag in all_tags:
                    if tag!='NOUN':
                        weights[(tag,word)]=0.0
        seq_tag=[]
        for cnt,word in enumerate(words):
            seq_tag.append("")
            seq_tag[cnt],_= clf_base.predict(defaultdict(int,{word:1}),weights,all_tags)
        return seq_tag
Ejemplo n.º 29
0
def perceptron_update(x, y, weights, labels):
    """compute the perceptron update for a single instance

    :param x: instance, a counter of base features and weights
    :param y: label, a string 
    :param weights: a weight vector, represented as a dict
    :param labels: set of possible labels 
    :returns: updates to weights, which should be added to weights
    :rtype: defaultdict

    """
    y_max = predict(x, weights, labels)
    y_max_label = y_max[0]
    fv = make_feature_vector(x, y)
    fv_new = make_feature_vector(x, y_max_label)
    new_thetha = defaultdict(float)
    if (y_max_label != y):
        for f in fv:
            #new_thetha[f] = weights[f] +fv[f];
            new_thetha[f] += fv[f]
        for f in fv_new:
            #new_thetha[f] = weights[f] - fv_new[f];
            new_thetha[f] -= fv_new[f]
    return new_thetha
Ejemplo n.º 30
0
def test_avg_perceptron_prediction_scores_for_negative_label ():
    global wap
    actual = predict ({'good':1,'worst':4,OFFSET:1}, wap, ALL_LABELS)[1]["NEG"]
    expected = 188.3729
    assert_almost_equals (expected, actual, places=1, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))
Ejemplo n.º 31
0
def test_avg_perceptron_prediction_actual_label ():
    global wap
    actual = predict ({'good':1,'worst':4,OFFSET:1}, wap, ALL_LABELS)[0]
    expected = "NEG"
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
Ejemplo n.º 32
0
def test_nb_prediction_actual_label ():
    actual = predict ({'good':1,'worst':4,OFFSET:1}, weights_nb, ALL_LABELS)[0]
    expected = "NEG"
    eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
Ejemplo n.º 33
0
def makeClassifierTagger(weights):
     #Code here
    predict_tag = lambda word, alltags : clf_base.predict({word : 1, OFFSET : 1}, weights, alltags)[0]
    tagger = lambda words, alltags: [predict_tag(word, alltags) for word in words]
    return tagger
Ejemplo n.º 34
0
def makeClassifierTagger(weights):
    return lambda words, alltags: [clf_base.predict({word: 1, OFFSET: 1}, weights, alltags)[0] for word in words]
Ejemplo n.º 35
0
 def classify(words, all_tags):
     """Returns a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger
     """
     return [
         clf_base.predict({word: 1}, weights, all_tags)[0] for word in words
     ]
Ejemplo n.º 36
0
def test_nb_prediction_scores_for_positive_label ():
    actual = predict ({'good':1,'worst':4,OFFSET:1}, weights_nb, ALL_LABELS)[1]["POS"]
    expected = -39.8792
    assert_almost_equals (expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))
Ejemplo n.º 37
0
def test_perceptron_prediction_scores_for_positive_label ():
    global wp
    actual = predict ({'good':1,'worst':4,OFFSET:1}, wp, ALL_LABELS)[1]["POS"]
    expected = -190.0
    assert_almost_equals (expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))