def evaluate(model, data, loss_function, word_to_ix, name='dev'): model.eval() avg_loss = 0.0 truth_res = [] pred_res = [] #print(data) #for sent, label in data: for sentwords, dgr in data: dgr = dataLoaderRegresser.prepare_degree(dgr) #truth_res.append(label_to_ix[label]) truth_res.append(dgr) #detaching it from its history on the last instance model.hidden = model.init_hidden() sent = dataLoaderRegresser.prepare_sequence(sentwords, word_to_ix) #label = dataLoader.prepare_label(label, label_to_ix) pred = model(sent) #pred_label = pred.data.max(1)[1].numpy() #pred_res.append(pred_label) print('->gold-degree %.4f, predicted-degree %.4f %s' % (dgr.item(), pred.item(), sentwords)) pred_res.append(pred) #model.zero_grad() # Note that we don't need to keep this when evaluating the model #loss = loss_function(pred, label) loss = loss_function(pred, dgr) #avg_loss += loss.data[0] avg_loss += loss.item() avg_loss /= len(data) #acc = get_accuracy(truth_res, pred_res) #print(name + ' avg_loss: %g train acc: %g' % (avg_loss, acc)) print(name + ' avg_loss: %g' % avg_loss) return avg_loss, pred_res
def train_epoch(model, train_data, loss_function, optimizer, word_to_ix, i): model.train() # https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch # model.train() tells your model that you are training the model. # So effectively layers like dropout, batchnorm etc. which behave different on the train and test procedure know what is going on and hence can behave accordingly. # You can call either model.eval() or model.train(mode=False) to tell that you are testing. avg_loss = 0.0 count = 0 truth_res = [] pred_res = [] #for sent, label in train_data: for sentwords, dgr in train_data: dgr = dataLoaderRegresser.prepare_degree(dgr) #truth_res.append(label_to_ix[label]) truth_res.append(dgr) #detaching it from its history on the last instance model.hidden = model.init_hidden() sent = dataLoaderRegresser.prepare_sequence(sentwords, word_to_ix) #label = dataLoader.prepare_label(label, label_to_ix) pred = model(sent) print('->gold-degree %.4f, predicted-degree %.4f %s' % (dgr.item(), pred.item(), sentwords)) #pred_label = pred.data.max(1)[1].numpy() #pred_res.append(pred_label) #print('->pred:', pred) #print('->dgr:', dgr) pred_res.append(pred) model.zero_grad( ) # set gradients of all model parameters to zero; same as optimizer.zero_grad() # https://discuss.pytorch.org/t/model-zero-grad-or-optimizer-zero-grad/28426/2 # model.zero_grad() vs optimizer.zero_grad() # if optimizer = optim.SGD(model.parameters()), model.zero_grad() and optimizer.zero_grad are the same. # They are still the same whether the optimizer is SGD, Adam, RMSProp etc. #loss = loss_function(pred, label) loss = loss_function(pred, dgr) #print('->loss', loss.item()) #avg_loss += loss.data[0] avg_loss += loss.item( ) # https://github.com/pytorch/pytorch/issues/6061 count += 1 #if count % 500 == 0: #print out every 500 sentences if count % 50 == 0: #print out every 50 sentences #print('epoch: %d iterations: %d loss: %g' % (i, count, loss.data[0])) print('epoch: %d iterations: %d loss: %g' % (i, count, loss.item())) loss.backward( ) # Calling .backward() multiple times accumulates the gradient (by addition) for each parameter. # This is why you should call optimizer.zero_grad() after each .step() call. # Note that following the thirst .backward call, a second call is only possible after you have performed another forward pass. optimizer.step( ) # It performs a parameter update based on the current gradient (stored in .grad attribute of a parameter) and the update rule. avg_loss /= len(train_data) print('epoch: %d done!\ntrain avg_loss: %g' % (i, avg_loss))