def test_nb_likelihood_ratio (): expected = 8.7916 instance = {'good':1,'worst':4,OFFSET:1} pos_likelihood = predict (instance, weights_nb, ALL_LABELS)[1]["POS"] neg_likelihood = predict (instance, weights_nb, ALL_LABELS)[1]["NEG"] actual = neg_likelihood - pos_likelihood assert_almost_equals (expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))
def test_nb_likelihood_ratio(): expected = 8.7916 instance = {'good': 1, 'worst': 4, OFFSET: 1} pos_likelihood = predict(instance, weights_nb, ALL_LABELS)[1]["POS"] neg_likelihood = predict(instance, weights_nb, ALL_LABELS)[1]["NEG"] actual = neg_likelihood - pos_likelihood assert_almost_equals(expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" % (expected, actual))
def test_d3_3a_nb(): global x_tr_pruned, y_tr theta_nb = naive_bayes.estimate_nb(x_tr_pruned,y_tr,0.1) y_hat,scores = clf_base.predict(x_tr_pruned[55],theta_nb,labels) assert_almost_equals(scores['2000s'],-1840.5064690929203,places=3) eq_(y_hat,'1980s') y_hat,scores = clf_base.predict(x_tr_pruned[155],theta_nb,labels) assert_almost_equals(scores['1980s'], -2153.0199277981355, places=3) eq_(y_hat,'2000s')
def test_d3_3a_nb(): global x_tr_pruned, y_tr theta_nb = naive_bayes.estimate_nb(x_tr_pruned, y_tr, 0.1) y_hat, scores = clf_base.predict(x_tr_pruned[55], theta_nb, labels) assert_almost_equals(scores['2000s'], -1840.5064690929203, places=3) eq_(y_hat, '1980s') y_hat, scores = clf_base.predict(x_tr_pruned[155], theta_nb, labels) assert_almost_equals(scores['1980s'], -2153.0199277981355, places=3) eq_(y_hat, '2000s')
def perceptron_update(x, y, weights, labels): """compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ y_hat, scores = predict(x, weights, labels) f_x_y = make_feature_vector(x, y) f_x_y_hat = make_feature_vector(x, y_hat) update = defaultdict(float) diffKeys = set(f_x_y.keys()) - set(f_x_y_hat.keys()) for key in diffKeys: update[key] = f_x_y.get(key) diffKeys = set(f_x_y_hat.keys()) - set(f_x_y.keys()) for key in diffKeys: update[key] = 0.0 - f_x_y_hat.get(key) return update
def test_nb_d3_3(): global x_tr, y_tr, x_dv, y_dv, x_te # public theta_nb = naive_bayes.estimate_nb(x_tr,y_tr,0.1) y_hat,scores = clf_base.predict(x_tr[55],theta_nb,labels) assert_almost_equals(scores['science'],-949.406,places=2)
def perceptron_update(x, y, weights, labels): """ compute the perceptron update for a single instance :param x: instance, a counter of base features :param y: label, strings :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ # update = f(x, y) - f(x, y_real) y_predicted, y_score = predict(x, weights, labels) update = defaultdict(float) f_predicted = make_feature_vector(x, y_predicted) f_real = make_feature_vector(x, y) features = set(f_predicted.keys()) features = features.union(f_real.keys()) for features in list(features): value = f_real[features] - f_predicted[features] if value != 0: update[features] = value return update
def perceptron_update(x,y,weights,labels): ''' compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict ''' updated_weights = defaultdict(float) y_pred, _ = predict(x, weights, labels) fxy = make_feature_vector(x, y) fxy_pred = make_feature_vector(x, y_pred) wrong_predictions = set(fxy.keys()).symmetric_difference(set(fxy_pred.keys())) for prediction in wrong_predictions: if prediction in fxy: updated_weights[prediction] = fxy.get(prediction) else: updated_weights[prediction] = - fxy_pred.get(prediction) return updated_weights
def test_nb_d3_3(): global x_tr, y_tr, x_dv, y_dv, x_te # public theta_nb = naive_bayes.estimate_nb(x_tr,y_tr,0.1) y_hat,scores = clf_base.predict(x_tr[55],theta_nb,labels) assert_almost_equals(scores['science'],-949.406,places=2)
def classify(words, all_tags): """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger :param words: list of words :param all_tags: all possible tags :returns: list of tags :rtype: list """ # all_counters = {} # returnList = [] # for x in words: # all_counters[x]=1 # predict_v = clf_base.predict(all_counters,weights,all_tags) # returnList.append(predict_v[0]) # return returnList returnList = [] counter = Counter(words) for x in words: predict_v = clf_base.predict({x: counter[x]}, weights, all_tags) returnList.append(predict_v[0]) return returnList
def test_perceptron_prediction_actual_label(): global wp actual = predict({'good': 1, 'worst': 4, OFFSET: 1}, wp, ALL_LABELS)[0] expected = "NEG" eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def test_clf_base_d2_2(): global x_tr, x_dv # public y_hat,scores = clf_base.predict(x_tr[5],hand_weights.theta_hand_original,labels) eq_(scores['iama'],0.1) eq_(scores['science'],5.0) eq_(y_hat,'science') eq_(scores['askreddit'],0.0)
def test_clf_base_d2_2(): global x_tr, x_dv # public y_hat,scores = clf_base.predict(x_tr[5],hand_weights.theta_hand_original,labels) eq_(scores['iama'],0.1) eq_(scores['science'],5.0) eq_(y_hat,'science') eq_(scores['askreddit'],0.0)
def test_nb_prediction_actual_label(): actual = predict({ 'good': 1, 'worst': 4, OFFSET: 1 }, weights_nb, ALL_LABELS)[0] expected = "NEG" eq_(expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" % (expected, actual))
def makeClassifierTagger(weights): #Code here predict_tag = lambda word, alltags: clf_base.predict({ word: 1, OFFSET: 1 }, weights, alltags)[0] tagger = lambda words, alltags: [ predict_tag(word, alltags) for word in words ] return tagger
def test_d2_2_predict(): global x_tr_pruned, x_dv_pruned, y_dv y_hat,scores = clf_base.predict(x_tr_pruned[0],hand_weights.theta_hand,labels) eq_(scores['pre-1980'],0.1) assert_almost_equals(scores['2000s'],1.3,places=5) eq_(y_hat,'2000s') eq_(scores['1980s'],0.0) y_hat = clf_base.predict_all(x_dv_pruned,hand_weights.theta_hand,labels) assert_almost_equals(evaluation.acc(y_hat,y_dv),.3422222, places=5)
def perceptron_update(x, y, weights, labels): """compute the perceptron update for a single instance """ update = defaultdict(float, {}) predictedLabel, scores = predict(x, weights, labels) if predictedLabel != y: update.update(make_feature_vector(x, y)) temp = make_feature_vector(x, predictedLabel) for x, y in temp.iteritems(): temp[x] = -y update.update(temp) return update
def test_d2_2_predict(): global x_tr_pruned, x_dv_pruned, y_dv y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand, labels) eq_(scores['pre-1980'], 0.1) assert_almost_equals(scores['2000s'], 1.3, places=5) eq_(y_hat, '2000s') eq_(scores['1980s'], 0.0) y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels) assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
def test_nb_prediction_scores_for_positive_label(): actual = predict({ 'good': 1, 'worst': 4, OFFSET: 1 }, weights_nb, ALL_LABELS)[1]["POS"] expected = -39.8792 assert_almost_equals(expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" % (expected, actual))
def classify(words, all_tags): """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger :param words: list of words :param all_tags: all possible tags :returns: list of tags :rtype: list """ ret = [] for word in words: ret.append(clf_base.predict({word: 1}, weights, all_tags)[0]) return ret
def test_perceptron_prediction_scores_for_positive_label(): global wp actual = predict({ 'good': 1, 'worst': 4, OFFSET: 1 }, wp, ALL_LABELS)[1]["POS"] expected = -190.0 assert_almost_equals(expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" % (expected, actual))
def test_avg_perceptron_prediction_scores_for_negative_label(): global wap actual = predict({ 'good': 1, 'worst': 4, OFFSET: 1 }, wap, ALL_LABELS)[1]["NEG"] expected = 188.3729 assert_almost_equals(expected, actual, places=1, msg="UNEQUAL Expected:%f, Actual:%f" % (expected, actual))
def compute_py(x,weights,labels): """compute probability P(y | x) :param x: base features :param weights: current weights :param labels: list of all possible labels :returns: probability distribution p(y | x), represented as dict {label:p(label|x)} :rtype: dict """ # hint: you should use clf_base.predict and logsumexp prob_dist = predict(x, weights, labels)[1] print prob_dist denom = logsumexp(prob_dist.values()) for label in prob_dist: prob_dist[label] = np.exp(prob_dist[label] - denom) return prob_dist
def compute_py(x, weights, labels): """compute probability P(y | x) :param x: base features :param weights: current weights :param labels: list of all possible labels :returns: probability distribution p(y | x), represented as dict {label:p(label|x)} :rtype: dict """ cc = {} key, value = predict(x, weights, labels) for label in labels: topPart = np.exp(value[label]) bottomPart = np.exp(logsumexp(value.values())) cc[label] = topPart / bottomPart return cc
def classify(words, all_tags): """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger and using basefeatures for each token (just the token and the offset) :param words: list of words :param all_tags: all possible tags :returns: list of tags :rtype: list """ counter = Counter() counter.update(words) aList = list() for word in words: base_features = {word: counter[word]} label, score = clf_base.predict(base_features, weights, all_tags) aList.append(label) return aList
def compute_py(x, weights, labels): """compute probability P(y | x) :param x: base features :param weights: current weights :param labels: list of all possible labels :returns: probability distribution p(y | x), represented as dict {label:p(label|x)} :rtype: dict """ y_test, scores = predict(x, weights, labels) denom_sum = logsumexp(scores.values()) final_dict = {} for label in labels: final_dict[label] = np.exp(scores[label] - denom_sum) # hint: you should use clf_base.predict and logsumexp return final_dict
def perceptron_update(x,y,weights,labels): """compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ updates = defaultdict(float) prediction = predict(x, weights, labels) if prediction[0] != y: feature_vector = make_feature_vector(x, y) y_hat_feature_vector = make_feature_vector(x, prediction[0]) for feature in feature_vector: updates[feature] = feature_vector[feature] for feature in y_hat_feature_vector: updates[feature] = -y_hat_feature_vector[feature] return updates
def classify(words, all_tags): """This nested function should return a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger and using basefeatures for each token (just the token and the offset) :param words: list of words :param all_tags: all possible tags :returns: list of tags :rtype: list """ #print(words) #print(weights) weights_words=[word[1] for word in weights.keys()] for word in words: if word not in weights_words: weights[('NOUN',word)]=1. for tag in all_tags: if tag!='NOUN': weights[(tag,word)]=0.0 seq_tag=[] for cnt,word in enumerate(words): seq_tag.append("") seq_tag[cnt],_= clf_base.predict(defaultdict(int,{word:1}),weights,all_tags) return seq_tag
def perceptron_update(x, y, weights, labels): """compute the perceptron update for a single instance :param x: instance, a counter of base features and weights :param y: label, a string :param weights: a weight vector, represented as a dict :param labels: set of possible labels :returns: updates to weights, which should be added to weights :rtype: defaultdict """ y_max = predict(x, weights, labels) y_max_label = y_max[0] fv = make_feature_vector(x, y) fv_new = make_feature_vector(x, y_max_label) new_thetha = defaultdict(float) if (y_max_label != y): for f in fv: #new_thetha[f] = weights[f] +fv[f]; new_thetha[f] += fv[f] for f in fv_new: #new_thetha[f] = weights[f] - fv_new[f]; new_thetha[f] -= fv_new[f] return new_thetha
def test_avg_perceptron_prediction_scores_for_negative_label (): global wap actual = predict ({'good':1,'worst':4,OFFSET:1}, wap, ALL_LABELS)[1]["NEG"] expected = 188.3729 assert_almost_equals (expected, actual, places=1, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))
def test_avg_perceptron_prediction_actual_label (): global wap actual = predict ({'good':1,'worst':4,OFFSET:1}, wap, ALL_LABELS)[0] expected = "NEG" eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def test_nb_prediction_actual_label (): actual = predict ({'good':1,'worst':4,OFFSET:1}, weights_nb, ALL_LABELS)[0] expected = "NEG" eq_ (expected, actual, msg="UNEQUAL Expected:%s, Actual:%s" %(expected, actual))
def makeClassifierTagger(weights): #Code here predict_tag = lambda word, alltags : clf_base.predict({word : 1, OFFSET : 1}, weights, alltags)[0] tagger = lambda words, alltags: [predict_tag(word, alltags) for word in words] return tagger
def makeClassifierTagger(weights): return lambda words, alltags: [clf_base.predict({word: 1, OFFSET: 1}, weights, alltags)[0] for word in words]
def classify(words, all_tags): """Returns a list of tags, computed using a classifier with the weights passed as arguments to make_classifier_tagger """ return [ clf_base.predict({word: 1}, weights, all_tags)[0] for word in words ]
def test_nb_prediction_scores_for_positive_label (): actual = predict ({'good':1,'worst':4,OFFSET:1}, weights_nb, ALL_LABELS)[1]["POS"] expected = -39.8792 assert_almost_equals (expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))
def test_perceptron_prediction_scores_for_positive_label (): global wp actual = predict ({'good':1,'worst':4,OFFSET:1}, wp, ALL_LABELS)[1]["POS"] expected = -190.0 assert_almost_equals (expected, actual, places=3, msg="UNEQUAL Expected:%f, Actual:%f" %(expected, actual))