start_time=time.time() optimizer.zero_grad() losses=model([batch_imgs,batch_boxes,batch_classes]) loss=losses[-1] loss.backward() optimizer.step() end_time=time.time() cost_time=int((end_time-start_time)*1000) print("global_steps:%d epoch:%d steps:%d/%d cls_loss:%.4f cnt_loss:%.4f reg_loss:%.4f cost_time:%dms lr=%.4e"%\ (GLOBAL_STEPS,epoch+1,epoch_step+1,steps_per_epoch,losses[0],losses[1],losses[2],cost_time,lr)) writer.add_scalar("loss/cls_loss",losses[0],global_step=GLOBAL_STEPS) writer.add_scalar("loss/cnt_loss",losses[1],global_step=GLOBAL_STEPS) writer.add_scalar("loss/reg_loss",losses[2],global_step=GLOBAL_STEPS) writer.add_scalar("lr",lr,global_step=GLOBAL_STEPS) GLOBAL_STEPS+=1 if (epoch+1)%10 == 0: torch.save(model.state_dict(),"./FCOSMASK_epoch%d_loss%.4f.pth"%(epoch+1,loss.item()))
start_time=time.time() optimizer.zero_grad() losses=model([batch_imgs,batch_boxes,batch_classes]) loss=losses[-1] loss.backward() optimizer.step() end_time=time.time() cost_time=int((end_time-start_time)*1000) print("global_steps:%d epoch:%d steps:%d/%d cls_loss:%.4f cnt_loss:%.4f reg_loss:%.4f cost_time:%dms lr=%.4e"%\ (GLOBAL_STEPS,epoch+1,epoch_step+1,steps_per_epoch,losses[0],losses[1],losses[2],cost_time,lr)) writer.add_scalar("loss/cls_loss",losses[0],global_step=GLOBAL_STEPS) writer.add_scalar("loss/cnt_loss",losses[1],global_step=GLOBAL_STEPS) writer.add_scalar("loss/reg_loss",losses[2],global_step=GLOBAL_STEPS) writer.add_scalar("lr",lr,global_step=GLOBAL_STEPS) GLOBAL_STEPS+=1 torch.save(model.state_dict(),"./voc2012_512x800_epoch%d_loss%.4f.pth"%(epoch+1,loss.item()))
torch.nn.utils.clip_grad_norm(model.parameters(), 3) optimizer.step() end_time = time.time() cost_time = int((end_time - start_time) * 1000) # if epoch_step % 100 == 0: print( "global_steps:%d epoch:%d steps:%d/%d cls_loss:%.4f cnt_loss:%.4f reg_loss:%.4f cost_time:%dms lr=%.4e total_loss:%.4f" % \ (GLOBAL_STEPS, epoch + 1, epoch_step + 1, steps_per_epoch, losses[0].mean(), losses[1].mean(), losses[2].mean(), cost_time, lr, loss.mean())) GLOBAL_STEPS += 1 for idx in range(4): summary.add_scalar(loss_list[idx], losses[idx][0], epoch) torch.save(model.state_dict(), "./checkpoint2/model_{}.pth".format(epoch + 1)) # if epoch + 1 > 23: model2 = FCOSDetector(mode="inference") model2 = torch.nn.DataParallel(model2) model2 = model2.cuda().eval() model2.load_state_dict(torch.load( "./checkpoint2/model_{}.pth".format(epoch + 1), map_location=torch.device('cuda:1')), strict=False) tt = coco_eval.evaluate_coco(val_dataset, model2) m_acc = tt[4].astype(float) if m_acc > best_acc: best_acc = m_acc best_ep = epoch + 1
''' print( "global_steps:%d epoch:%d steps:%d/%d loss_fcos:%.4f loss_anchor:%.4f cnt_loss:%.4f"% \ (GLOBAL_STEPS, epoch + 1, epoch_step + 1, steps_per_epoch, loss_fcos[0]+loss_fcos[2], loss_anchor[0]+loss_anchor[1], loss_fcos[1])) ''' if draw: writer.add_scalar("loss/loss", loss, global_step=GLOBAL_STEPS) writer.add_scalar("loss/cls_loss", loss_fcos[0], global_step=GLOBAL_STEPS) # writer.add_scalar("loss/cnt_loss",loss_fcos[1], global_step=GLOBAL_STEPS) writer.add_scalar("loss/reg_loss", loss_fcos[1], global_step=GLOBAL_STEPS) writer.add_scalar("loss/loc_anchor", loss_anchor[0], global_step=GLOBAL_STEPS) writer.add_scalar("loss/cls_anchor", loss_anchor[1], global_step=GLOBAL_STEPS) writer.add_scalar("lr", lr, global_step=GLOBAL_STEPS) GLOBAL_STEPS += 1 loss_.append(loss) loss_avg = torch.mean(torch.stack(loss_)) torch.save( model.state_dict(), "/mnt/hdd1/benkebishe01/dianwang/insulator/voc_epoch%d_loss%.4f.pth" % (epoch + 1, loss_avg.item()))