def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    '''

    labels = list(set(y_tr))
    scores = {}
    best = 0
    best_sm = 0

    for i in smoothers:
        weights = estimate_nb(x_tr, y_tr, i)
        y_hat = clf_base.predict_all(x_dv, weights, labels)
        acc = evaluation.acc(y_hat, y_dv)
        scores[i] = acc
        if acc > best:
            best = acc
            best_sm = i

    return best_sm, scores
Ejemplo n.º 2
0
def test_d2_2_predict():
    global x_tr_pruned, x_dv_pruned, y_dv

    y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand,
                                     labels)
    eq_(scores['pre-1980'], 0.1)
    assert_almost_equals(scores['2000s'], 1.3, places=5)
    eq_(y_hat, '2000s')
    eq_(scores['1980s'], 0.0)

    y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels)
    assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
Ejemplo n.º 3
0
def find_best_smoother(x_tr,y_tr,x_dv,y_dv,smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    '''
    my_acc_dict = {}
    max_score = 0.0;
    for i in range(len(smoothers)):
      weights = estimate_nb(x_tr, y_tr, smoothers[i])
      y_hat = clf_base.predict_all(x_dv,weights,y_dv)
      acc = evaluation.acc(y_hat,y_dv)
      if( acc > max_score):
        max_score = acc
      my_acc_dict[smoothers[i]] = acc
    
    return max_score, my_acc_dict