def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float ''' labels = list(set(y_tr)) scores = {} best = 0 best_sm = 0 for i in smoothers: weights = estimate_nb(x_tr, y_tr, i) y_hat = clf_base.predict_all(x_dv, weights, labels) acc = evaluation.acc(y_hat, y_dv) scores[i] = acc if acc > best: best = acc best_sm = i return best_sm, scores
def test_d2_2_predict(): global x_tr_pruned, x_dv_pruned, y_dv y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand, labels) eq_(scores['pre-1980'], 0.1) assert_almost_equals(scores['2000s'], 1.3, places=5) eq_(y_hat, '2000s') eq_(scores['1980s'], 0.0) y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels) assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
def find_best_smoother(x_tr,y_tr,x_dv,y_dv,smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float ''' my_acc_dict = {} max_score = 0.0; for i in range(len(smoothers)): weights = estimate_nb(x_tr, y_tr, smoothers[i]) y_hat = clf_base.predict_all(x_dv,weights,y_dv) acc = evaluation.acc(y_hat,y_dv) if( acc > max_score): max_score = acc my_acc_dict[smoothers[i]] = acc return max_score, my_acc_dict