def train(args): print('Dataset of instance(s) and batch size is {}'.format( args.batch_size)) vgg = models.vgg16(True) model = YOLO(vgg.features) if args.use_cuda: model = torch.nn.DataParallel(model) model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) best = 1e+30 for epoch in range(1, args.epochs + 1): l = train_epoch(epoch, model, optimizer, args) upperleft, bottomright, classes, confs = test_epoch( model, jpg='../data/1.jpg') is_best = l < best best = min(l, best) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best) checkpoint = torch.load('./model_best.pth.tar') state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) model.cpu() torch.save(model.state_dict(), 'model_cpu.pth.tar')
loss = model(x, y, z) losses.append(loss.cpu().item()) metrics = model.metrics logger.step(metrics, epoch, batch) logger.step({'Loss': losses[-1]}, epoch, batch) log = 'Epoch {} | {} / {}'.format(epoch, batch, batches) for key in metrics: log += ' | {}: {:.4f}'.format(key, metrics[key]) log += ' | loss: {:.4f}\r'.format(losses[-1]) print(log, end='') optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() logger.epochEnd(epoch) avgLoss = np.mean(losses) print('\nEpoch {}, loss: {:.8f}'.format(epoch, avgLoss)) if avgLoss < prevBestLoss: print( '[+] Loss improved from {:.8f} to {:.8f}, saving model...'.format( prevBestLoss, avgLoss)) torch.save(model.state_dict(), 'model.pt') prevBestLoss = avgLoss logger.addScalar('Model', avgLoss, epoch) logger.flush() logger.close()
output = yolo(sample_batched['image'].float()) valid_loss += loss_fn.forward(output, sample_batched['boxes']) valid_loss /= len(valid_data) // 8 print("valid_loss: ", valid_loss) # train set epoch train_loss = 0. for i_batch, sample_batched in enumerate(train_dataloader): output = yolo(sample_batched['image'].float()) loss = loss_fn.forward(output, sample_batched['boxes']) train_loss += loss print(f"{epoch}: {i_batch} loss: {loss}") optim.zero_grad() loss.backward() optim.step() train_loss /= len(train_data) // 8 print("train_loss: ", train_loss) if epoch % checkpoint_interval == 0: print("Saving checkpoint") torch.save(yolo.state_dict(), f"checkpoints/yolo_ckpt_{epoch}.pth")