device)) # 計算 loss (注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上) batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient optimizer.step() # 以 optimizer 用 gradient 更新參數值 train_acc += np.sum( np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) train_loss += batch_loss.item() model.eval() with torch.no_grad(): for i, data in enumerate(val_loader): val_pred = model(data[0].to(device)) batch_loss = loss(val_pred, data[1].to(device)) val_acc += np.sum( np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() if val_acc > best_acc: torch.save(model.state_dict(), 'ckpt.model') print('saving model with acc {:.3f}'.format( val_acc / val_set.__len__() * 100)) best_acc = val_acc #將結果 print 出來 print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \ (epoch + 1, num_epoch, time.time()-epoch_start_time, \ train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__())) scheduler.step(val_acc / val_set.__len__())
train_loss += batch_loss.item() # type(batch_loss) == torch.Tensor model.eval() #same as model.train(False) with torch.no_grad(): for i, data in enumerate(val_loader): val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) val_acc += np.sum( np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \ (epoch + 1, num_epoch, time.time()-epoch_start_time, \ train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__())) savemodel(model, "./", 1) ''' #torch.save({'state_dict': model.state_dict()}, 'hw3_parm_b.pkl') model.eval() prediction = [] all_pred = [] with torch.no_grad(): for i, data in enumerate(val_loader): test_pred = model(data[0].cuda()) test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1) for y in test_label: prediction.append(y) for y in test_pred.cpu().data.numpy(): all_pred.append(y)
train_loss += batch_loss.item() model.eval() with torch.no_grad(): for i, data in enumerate(val_loader): val_pred = model(data[0].cuda()) batch_loss = loss(val_pred, data[1].cuda()) val_acc += np.sum( np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy()) val_loss += batch_loss.item() print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \ (epoch + 1, num_epoch, time.time()-epoch_start_time, \ train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__())) train_loss_list.append(train_loss / train_set.__len__()) dev_loss_list.append(val_loss / val_set.__len__()) train_acc_list.append(train_acc / train_set.__len__()) dev_acc_list.append(val_acc / val_set.__len__()) if ((val_acc / val_set.__len__()) > val_acc_max) and ( train_acc / train_set.__len__() > train_acc_max): val_acc_max = val_acc / val_set.__len__() train_acc_max = train_acc / train_set.__len__() print("save") torch.save(model.state_dict(), "./model_" + str(val_acc / val_set.__len__())) torch.save(model.state_dict(), "./model_last")