# write tensorboard logs of the average loss summary_writer.add_scalar('loss_train', train_loss, j) summary_writer.add_scalar('loss_bbox', bbox_loss, j) summary_writer.add_scalar('loss_iou', iou_loss, j) summary_writer.add_scalar('loss_cls', cls_loss, j) summary_writer.add_scalar('learning_rate', lr, j) train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 t.clear() # learning rate decay every said epochs if step in cfg.lr_decay_epochs: lr *= cfg.lr_decay optimizer = torch.optim.SGD(optimizable(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) # write parameters and hyper-parameters to checkpoints train_output_dir = os.path.join(cfg.TRAIN_DIR, 'checkpoints', exp_name) cfg.mkdir(train_output_dir, max_depth=3) save_name = os.path.join(train_output_dir, '{}.h5'.format(step)) net_utils.save_net(j, exp_name, step + 1, lr, save_name, net) print(('save model: {}'.format(save_name))) # clean old checkpoints if step % 10 == 1: cfg.clean_ckpts(train_output_dir) # counter reset step_cnt = 0
image.shape, cfg, thresh=0.3, size_index=size_index) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) summary_writer.add_image('predict', im2show, step) train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 t.clear() size_index = randint(0, len(cfg.multi_scale_inp_size) - 1) print("image_size {}".format(cfg.multi_scale_inp_size[size_index])) if step > 0 and (step % imdb.batch_per_epoch == 0): if imdb.epoch in cfg.lr_decay_epochs: lr *= cfg.lr_decay optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) save_name = os.path.join(cfg.train_output_dir, '{}_{}.h5'.format(cfg.exp_name, imdb.epoch)) net_utils.save_net(save_name, net) print(('save model: {}'.format(save_name))) step_cnt = 0 imdb.close()
iou_loss /= cnt cls_loss /= cnt print(('epoch %d[%d/%d], loss: %.3f, bbox_loss: %.3f, iou_loss: %.3f, ' 'cls_loss: %.3f (%.2f s/batch, rest:%s)' % (i_batch, step_cnt, batch_size, train_loss, bbox_loss, iou_loss, cls_loss, duration, str( datetime.timedelta(seconds=int((batch_size - step_cnt) * duration)))))) # noqa train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 t.clear() size_index = randint(0, len(cfg.multi_scale_inp_size) - 1) print("image_size {}".format(cfg.multi_scale_inp_size[size_index])) if i_batch > 0 and (i_batch % batch_size == 0): if i_batch in cfg.lr_decay_epochs: lr *= cfg.lr_decay optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) save_name = os.path.join(cfg.train_output_dir, '{}_{}.h5'.format(cfg.exp_name, i_batch)) net_utils.save_net(save_name, model) print(('save model: {}'.format(save_name))) step_cnt = 0