def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    '''

    labels = list(set(y_tr))
    scores = {}
    best = 0
    best_sm = 0

    for i in smoothers:
        weights = estimate_nb(x_tr, y_tr, i)
        y_hat = clf_base.predict_all(x_dv, weights, labels)
        acc = evaluation.acc(y_hat, y_dv)
        scores[i] = acc
        if acc > best:
            best = acc
            best_sm = i

    return best_sm, scores
Esempio n. 2
0
def train_model(loss,
                model,
                X_tr_var,
                Y_tr_var,
                num_its=200,
                X_dv_var=None,
                Y_dv_var=None,
                status_frequency=10,
                optim_args={
                    'lr': 0.002,
                    'momentum': 0
                },
                param_file='best.params'):

    # initialize optimizer
    optimizer = optim.SGD(model.parameters(), **optim_args)

    losses = []
    accuracies = []

    for epoch in range(num_its):
        # set gradient to zero
        optimizer.zero_grad()
        # run model forward to produce loss
        output = loss.forward(model.forward(X_tr_var), Y_tr_var)
        # backpropagate and train
        output.backward()
        optimizer.step()

        losses.append(output.item())

        # write parameters if this is the best epoch yet
        if X_dv_var is not None:
            # run forward on dev data
            _, Y_hat = model.forward(X_dv_var).max(dim=1)
            # compute dev accuracy
            acc = evaluation.acc(Y_hat.data.numpy(), Y_dv_var.data.numpy())
            # save
            if len(accuracies) == 0 or acc > max(accuracies):
                state = {
                    'state_dict': model.state_dict(),
                    'epoch': len(accuracies) + 1,
                    'accuracy': acc
                }
                torch.save(state, param_file)
            accuracies.append(acc)

        # print status message if desired
        if status_frequency > 0 and epoch % status_frequency == 0:
            print("Epoch " + str(epoch + 1) + ": Dev Accuracy: " + str(acc))

    # load parameters of best model
    checkpoint = torch.load(param_file)
    model.load_state_dict(checkpoint['state_dict'])

    return model, losses, accuracies
Esempio n. 3
0
def test_d2_2_predict():
    global x_tr_pruned, x_dv_pruned, y_dv

    y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand,
                                     labels)
    eq_(scores['pre-1980'], 0.1)
    assert_almost_equals(scores['2000s'], 1.3, places=5)
    eq_(y_hat, '2000s')
    eq_(scores['1980s'], 0.0)

    y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels)
    assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
Esempio n. 4
0
def find_best_smoother(x_tr,y_tr,x_dv,y_dv,smoothers):
    '''
    find the smoothing value that gives the best accuracy on the dev data

    :param x_tr: training instances
    :param y_tr: training labels
    :param x_dv: dev instances
    :param y_dv: dev labels
    :param smoothers: list of smoothing values
    :returns: best smoothing value
    :rtype: float

    '''
    my_acc_dict = {}
    max_score = 0.0;
    for i in range(len(smoothers)):
      weights = estimate_nb(x_tr, y_tr, smoothers[i])
      y_hat = clf_base.predict_all(x_dv,weights,y_dv)
      acc = evaluation.acc(y_hat,y_dv)
      if( acc > max_score):
        max_score = acc
      my_acc_dict[smoothers[i]] = acc
    
    return max_score, my_acc_dict
Esempio n. 5
0
def test_d4_2b_perc_accuracy():
    global y_dv
    # i get 43% accuracy
    y_hat_dv = evaluation.read_predictions('perc-dev.preds')
    assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .43)
Esempio n. 6
0
def test_d3_3b_nb():
    global y_dv
    y_hat_dv = evaluation.read_predictions('nb-dev.preds')
    assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .46)
Esempio n. 7
0
def train_model(loss, model, X_tr,Y_tr, word_to_ix, tag_to_ix, X_dv=None, Y_dv = None, num_its=50, status_frequency=10,
               optim_args = {'lr':0.1,'momentum':0},
               param_file = 'best.params'):
    
    #initialize optimizer
    optimizer = optim.SGD(model.parameters(), **optim_args)
    
    losses=[]
    accuracies=[]
    
    for epoch in range(num_its):
        
        loss_value=0
        count1=0
        
        for X,Y in zip(X_tr,Y_tr):
            X_tr_var = prepare_sequence(X, word_to_ix)
            Y_tr_var = prepare_sequence(Y, tag_to_ix)
            
            # set gradient to zero
            optimizer.zero_grad()
            
            lstm_feats= model.forward(X_tr_var)
            output = loss(lstm_feats,Y_tr_var)
            
            output.backward()
            optimizer.step()
            loss_value += output.item()
            count1+=1
            
            
        losses.append(loss_value/count1)
        
        # write parameters if this is the best epoch yet
        acc=0        
        if X_dv is not None and Y_dv is not None:
            acc=0
            count2=0
            for Xdv, Ydv in zip(X_dv, Y_dv):
                
                X_dv_var = prepare_sequence(Xdv, word_to_ix)
                Y_dv_var = prepare_sequence(Ydv, tag_to_ix)
                # run forward on dev data
                Y_hat = model.predict(X_dv_var)
                
                Yhat = np.array([tag_to_ix[yhat] for yhat in Y_hat])
                Ydv = np.array([tag_to_ix[ydv] for ydv in Ydv])
                
                # compute dev accuracy
                acc += (evaluation.acc(Yhat,Ydv))*len(Xdv)
                count2 += len(Xdv)
                # save
            acc/=count2
            if len(accuracies) == 0 or acc > max(accuracies):
                state = {'state_dict':model.state_dict(),
                         'epoch':len(accuracies)+1,
                         'accuracy':acc}
                torch.save(state,param_file)
            accuracies.append(acc)
        # print status message if desired
        if status_frequency > 0 and epoch % status_frequency == 0:
            print("Epoch "+str(epoch+1)+": Dev Accuracy: "+str(acc))
    return model, losses, accuracies
def test_d7_3_bakeoff_dev1():
    global Y_dv_var
    acc = evaluation.acc(np.load('bakeoff-dev.preds.npy'),
                         Y_dv_var.data.numpy())
    assert_greater_equal(acc, 0.51)
def test_d5_5_accuracy():
    global Y_dv_var
    acc = evaluation.acc(np.load('logreg-es-dev.preds.npy'),
                         Y_dv_var.data.numpy())
    assert_greater_equal(acc, 0.5)