def find_best_smoother(x_tr, y_tr, x_dv, y_dv, smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float ''' labels = list(set(y_tr)) scores = {} best = 0 best_sm = 0 for i in smoothers: weights = estimate_nb(x_tr, y_tr, i) y_hat = clf_base.predict_all(x_dv, weights, labels) acc = evaluation.acc(y_hat, y_dv) scores[i] = acc if acc > best: best = acc best_sm = i return best_sm, scores
def train_model(loss, model, X_tr_var, Y_tr_var, num_its=200, X_dv_var=None, Y_dv_var=None, status_frequency=10, optim_args={ 'lr': 0.002, 'momentum': 0 }, param_file='best.params'): # initialize optimizer optimizer = optim.SGD(model.parameters(), **optim_args) losses = [] accuracies = [] for epoch in range(num_its): # set gradient to zero optimizer.zero_grad() # run model forward to produce loss output = loss.forward(model.forward(X_tr_var), Y_tr_var) # backpropagate and train output.backward() optimizer.step() losses.append(output.item()) # write parameters if this is the best epoch yet if X_dv_var is not None: # run forward on dev data _, Y_hat = model.forward(X_dv_var).max(dim=1) # compute dev accuracy acc = evaluation.acc(Y_hat.data.numpy(), Y_dv_var.data.numpy()) # save if len(accuracies) == 0 or acc > max(accuracies): state = { 'state_dict': model.state_dict(), 'epoch': len(accuracies) + 1, 'accuracy': acc } torch.save(state, param_file) accuracies.append(acc) # print status message if desired if status_frequency > 0 and epoch % status_frequency == 0: print("Epoch " + str(epoch + 1) + ": Dev Accuracy: " + str(acc)) # load parameters of best model checkpoint = torch.load(param_file) model.load_state_dict(checkpoint['state_dict']) return model, losses, accuracies
def test_d2_2_predict(): global x_tr_pruned, x_dv_pruned, y_dv y_hat, scores = clf_base.predict(x_tr_pruned[0], hand_weights.theta_hand, labels) eq_(scores['pre-1980'], 0.1) assert_almost_equals(scores['2000s'], 1.3, places=5) eq_(y_hat, '2000s') eq_(scores['1980s'], 0.0) y_hat = clf_base.predict_all(x_dv_pruned, hand_weights.theta_hand, labels) assert_almost_equals(evaluation.acc(y_hat, y_dv), .3422222, places=5)
def find_best_smoother(x_tr,y_tr,x_dv,y_dv,smoothers): ''' find the smoothing value that gives the best accuracy on the dev data :param x_tr: training instances :param y_tr: training labels :param x_dv: dev instances :param y_dv: dev labels :param smoothers: list of smoothing values :returns: best smoothing value :rtype: float ''' my_acc_dict = {} max_score = 0.0; for i in range(len(smoothers)): weights = estimate_nb(x_tr, y_tr, smoothers[i]) y_hat = clf_base.predict_all(x_dv,weights,y_dv) acc = evaluation.acc(y_hat,y_dv) if( acc > max_score): max_score = acc my_acc_dict[smoothers[i]] = acc return max_score, my_acc_dict
def test_d4_2b_perc_accuracy(): global y_dv # i get 43% accuracy y_hat_dv = evaluation.read_predictions('perc-dev.preds') assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .43)
def test_d3_3b_nb(): global y_dv y_hat_dv = evaluation.read_predictions('nb-dev.preds') assert_greater_equal(evaluation.acc(y_hat_dv, y_dv), .46)
def train_model(loss, model, X_tr,Y_tr, word_to_ix, tag_to_ix, X_dv=None, Y_dv = None, num_its=50, status_frequency=10, optim_args = {'lr':0.1,'momentum':0}, param_file = 'best.params'): #initialize optimizer optimizer = optim.SGD(model.parameters(), **optim_args) losses=[] accuracies=[] for epoch in range(num_its): loss_value=0 count1=0 for X,Y in zip(X_tr,Y_tr): X_tr_var = prepare_sequence(X, word_to_ix) Y_tr_var = prepare_sequence(Y, tag_to_ix) # set gradient to zero optimizer.zero_grad() lstm_feats= model.forward(X_tr_var) output = loss(lstm_feats,Y_tr_var) output.backward() optimizer.step() loss_value += output.item() count1+=1 losses.append(loss_value/count1) # write parameters if this is the best epoch yet acc=0 if X_dv is not None and Y_dv is not None: acc=0 count2=0 for Xdv, Ydv in zip(X_dv, Y_dv): X_dv_var = prepare_sequence(Xdv, word_to_ix) Y_dv_var = prepare_sequence(Ydv, tag_to_ix) # run forward on dev data Y_hat = model.predict(X_dv_var) Yhat = np.array([tag_to_ix[yhat] for yhat in Y_hat]) Ydv = np.array([tag_to_ix[ydv] for ydv in Ydv]) # compute dev accuracy acc += (evaluation.acc(Yhat,Ydv))*len(Xdv) count2 += len(Xdv) # save acc/=count2 if len(accuracies) == 0 or acc > max(accuracies): state = {'state_dict':model.state_dict(), 'epoch':len(accuracies)+1, 'accuracy':acc} torch.save(state,param_file) accuracies.append(acc) # print status message if desired if status_frequency > 0 and epoch % status_frequency == 0: print("Epoch "+str(epoch+1)+": Dev Accuracy: "+str(acc)) return model, losses, accuracies
def test_d7_3_bakeoff_dev1(): global Y_dv_var acc = evaluation.acc(np.load('bakeoff-dev.preds.npy'), Y_dv_var.data.numpy()) assert_greater_equal(acc, 0.51)
def test_d5_5_accuracy(): global Y_dv_var acc = evaluation.acc(np.load('logreg-es-dev.preds.npy'), Y_dv_var.data.numpy()) assert_greater_equal(acc, 0.5)