def evaluate(model, data, loss_function, word_to_ix, all_losses_dev, name='dev'): model.eval() avg_loss = 0.0 truth_res = [] pred_res = [] #print(data) #for sent, label in data: loss_plot = 0.0 count = 0.0 for sentwords, dgr in data: count += 1 dgr = dataLoaderRegresser.prepare_degree(dgr) #truth_res.append(label_to_ix[label]) truth_res.append(dgr) #detaching it from its history on the last instance model.hidden = model.init_hidden() sent = dataLoaderRegresser.prepare_sequence_pretrainedVec(sentwords) #label = dataLoader.prepare_label(label, label_to_ix) pred = model(sent) #pred_label = pred.data.max(1)[1].numpy() #pred_res.append(pred_label) ######print('->gold-degree %.4f, predicted-degree %.4f %s' % (dgr.item(), pred.item(), sentwords)) pred_res.append(pred) #model.zero_grad() # Note that we don't need to keep this when evaluating the model #loss = loss_function(pred, label) loss = loss_function(pred, dgr) #avg_loss += loss.data[0] avg_loss += loss.item() loss_plot += loss.item() plotEvery = 10.0 if count % plotEvery == 0: #all_losses_dev.append(loss_plot/plotEvery) loss_plot = 0.0 avg_loss /= len(data) all_losses_dev.append(avg_loss) #acc = get_accuracy(truth_res, pred_res) #print(name + ' avg_loss: %g train acc: %g' % (avg_loss, acc)) print(name + ' avg_loss: %g' % avg_loss) return avg_loss, pred_res, all_losses_dev
def train_epoch(model, train_data, loss_function, optimizer, word_to_ix, i, all_losses): model.train() # https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch # model.train() tells your model that you are training the model. # So effectively layers like dropout, batchnorm etc. which behave different on the train and test procedure know what is going on and hence can behave accordingly. # You can call either model.eval() or model.train(mode=False) to tell that you are testing. avg_loss = 0.0 count = 0 truth_res = [] pred_res = [] loss_plot = 0.0 #for sent, label in train_data: random.shuffle(train_data) train_data = train_data[:int(0.7 * len(train_data))] for sentwords, dgr in train_data: dgr = dataLoaderRegresser.prepare_degree(dgr) #truth_res.append(label_to_ix[label]) truth_res.append(dgr) #detaching it from its history on the last instance model.hidden = model.init_hidden() sent = dataLoaderRegresser.prepare_sequence_pretrainedVec(sentwords) #label = dataLoader.prepare_label(label, label_to_ix) pred = model(sent) ######print('->gold-degree %.4f, predicted-degree %.4f %s' % (dgr.item(), pred.item(), sentwords)) #pred_label = pred.data.max(1)[1].numpy() #pred_res.append(pred_label) #print('->pred:', pred) #print('->dgr:', dgr) pred_res.append(pred) #model.zero_grad() # set gradients of all model parameters to zero; same as optimizer.zero_grad() optimizer.zero_grad() # https://discuss.pytorch.org/t/model-zero-grad-or-optimizer-zero-grad/28426/2 # model.zero_grad() vs optimizer.zero_grad() # if optimizer = optim.SGD(model.parameters()), model.zero_grad() and optimizer.zero_grad are the same. # They are still the same whether the optimizer is SGD, Adam, RMSProp etc. #loss = loss_function(pred, label) loss = loss_function(pred, dgr) #print('->loss', loss.item()) #avg_loss += loss.data[0] avg_loss += loss.item( ) # https://github.com/pytorch/pytorch/issues/6061 loss_plot += loss.item() count += 1 printEvery = 50.0 #if count % 500 == 0: #print out every 500 sentences if count % printEvery == 0: #print out every 50 sentences #print('epoch: %d iterations: %d loss: %g' % (i, count, loss.data[0])) #all_losses.append(loss_plot/printEvery) loss_plot = 0.0 print('epoch: %d iterations: %d loss: %g' % (i, count, loss_plot / printEvery)) loss.backward( ) # Calling .backward() multiple times accumulates the gradient (by addition) for each parameter. # This is why you should call optimizer.zero_grad() after each .step() call. # Note that following the thirst .backward call, a second call is only possible after you have performed another forward pass. optimizer.step( ) # It performs a parameter update based on the current gradient (stored in .grad attribute of a parameter) and the update rule. avg_loss /= len(train_data) all_losses.append(avg_loss) print('epoch: %d done!\ntrain avg_loss: %g' % (i, avg_loss)) # %g # https://stackoverflow.com/questions/30580481/why-does-e-behave-different-than-g-in-format-strings return all_losses, model