data = mx.nd.array(data, ctx=ctx) _c, h, w = data.shape data = data.reshape(1, _c, h, w) # label = label.as_in_context(ctx) rpn_cls, rpn_reg, f = net.rpn(data) f_height = f.shape[2] f_width = f.shape[3] rpn_bbox_pred = proposal_test(rpn_cls, rpn_reg, f.shape, data.shape, ctx) # RCNN part # add batch dimension rpn_bbox_pred_attach_batchid = mx.nd.concatenate( [mx.nd.zeros((rpn_bbox_pred.shape[0], 1), ctx), rpn_bbox_pred], axis=1) f = mx.nd.ROIPooling(f, rpn_bbox_pred_attach_batchid, (7, 7), 1.0 / 16) # VGG16 based spatial stride=16 rcnn_cls, rcnn_reg = net.rcnn(f) rcnn_bbox_pred = mx.nd.zeros(rcnn_reg.shape) for i in range(len(test_dataset.voc_class_name)): rcnn_bbox_pred[:, i * 4:(i + 1) * 4] = bbox_clip( bbox_inverse_transform(rpn_bbox_pred, rcnn_reg[:, i * 4:(i + 1) * 4]), h, w) rcnn_cls = mx.nd.softmax(rcnn_cls) # NMS by class keep_boxes = [] for cls_id in range(1, len(test_dataset.voc_class_name)): cur_scores = rcnn_cls[:, cls_id].asnumpy() bboxes_pick = rcnn_bbox_pred[:, cls_id * 4:(cls_id + 1) * 4].asnumpy() cur_scores, bboxes_pick = nms(cur_scores, bboxes_pick, cfg.rcnn_nms_thresh) for i in range(len(cur_scores)):
def main(): global args, logger train_ds = train_dataset() # only support batch_size = 1 so far train_datait = mx.gluon.data.DataLoader(train_ds, batch_size=1, shuffle=True) ctx = [mx.gpu(i) for i in range(len(args.gpus.split(",")))] ctx = ctx[0] net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes, feature_name=args.feature_name) net.init_params(ctx) if args.pretrained_model != "": net.collect_params().load(args.pretrained_model, ctx) logger.info("loading {}".format(args.pretrained_model)) lr_schdl = mx.lr_scheduler.FactorScheduler(step=20000, factor=0.9) trainer = mx.gluon.trainer.Trainer(net.collect_params(), 'sgd', optimizer_params={ 'learning_rate': args.learning_rate, 'wd': args.weight_decay, "lr_scheduler": lr_schdl, 'momentum': 0.9 }) anchors_count = len(cfg.anchor_ratios) * len(cfg.anchor_scales) for epoch in range(0, args.epochs): last_iter_end_timestamp = time.time() for it, (data, label) in enumerate(train_datait): data_loaed_time = time.time() data = data.as_in_context(ctx) _n, _c, h, w = data.shape label = label.as_in_context(ctx).reshape((1, -1, 5)) with mx.autograd.record(): rpn_cls, rpn_reg, f = net.rpn(data) f_height = f.shape[2] f_width = f.shape[3] rpn_cls_gt, rpn_reg_gt = rpn_gt_opr(rpn_reg.shape, label, ctx, h, w) rpn_bbox_sampled, rcnn_reg_target, rcnn_cls_target = proposal_train( rpn_cls, rpn_reg, label, f.shape, data.shape, ctx) # RPN Loss part # Reshape and transpose to the shape of gt rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width)) rpn_cls = mx.nd.transpose(rpn_cls, (0, 1, 3, 4, 2)) rpn_reg = mx.nd.transpose( rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 1, 3, 4, 2)) mask = (rpn_cls_gt == 1).reshape( (1, anchors_count, f_height, f_width, 1)).broadcast_to( (1, anchors_count, f_height, f_width, 4)) rpn_loss_reg = mx.nd.sum( mx.nd.smooth_l1( (rpn_reg - rpn_reg_gt) * mask, 3.0)) / mx.nd.sum(mask) rpn_loss_cls = softmax_celoss_with_ignore( rpn_cls.reshape((-1, 2)), rpn_cls_gt.reshape((-1, )), -1) # RCNN part # add batch dimension rpn_bbox_sampled = mx.nd.concatenate([ mx.nd.zeros( (rpn_bbox_sampled.shape[0], 1), ctx), rpn_bbox_sampled ], axis=1) f = mx.nd.ROIPooling(f, rpn_bbox_sampled, (7, 7), 1.0 / 16) # VGG16 based spatial stride=16 rcnn_cls, rcnn_reg = net.rcnn(f) mask = (rcnn_cls_target > 0).reshape( (rcnn_cls_target.shape[0], 1)).broadcast_to( (rcnn_cls_target.shape[0], 4 * cfg.num_classes)) rcnn_loss_reg = mx.nd.sum( mx.nd.smooth_l1((rcnn_reg - rcnn_reg_target) * mask, 1.0)) / mx.nd.sum(mask) rcnn_loss_cls = mx.nd.softmax_cross_entropy( rcnn_cls, rcnn_cls_target) / rcnn_cls.shape[0] loss = rpn_loss_cls + rpn_loss_reg + rcnn_loss_cls + rcnn_loss_reg loss.backward() trainer.step(data.shape[0]) logger.info( "Epoch {} Iter {:>6d}: loss={:>6.5f}, rpn_loss_cls={:>6.5f}, rpn_loss_reg={:>6.5f}, rcnn_loss_cls={:>6.5f}, rcnn_loss_reg={:>6.5f}, lr={:>6.5f}" .format(epoch, it, loss.asscalar(), rpn_loss_cls.asscalar(), rpn_loss_reg.asscalar(), rcnn_loss_cls.asscalar(), rcnn_loss_reg.asscalar(), trainer.learning_rate)) net.collect_params().save( os.path.join(args.save_path, "lastest.gluonmodel")) if epoch % args.save_interval == 0: save_schema = os.path.split(args.save_path)[1] + "-{}" net.collect_params().save( os.path.join(args.save_path, save_schema.format(epoch) + ".gluonmodel"))
def main(): global args, logger train_ds = train_dataset() CTX = [mx.gpu(int(i)) for i in args.gpus.split(",")] n_gpus = len(CTX) assert args.batch_size == n_gpus logger.info(CTX) logger.info("batch_size = {}, 1 images per GPU".format(n_gpus)) train_datait = mx.gluon.data.DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=16, last_batch="discard") net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes, pretrained_model=args.model, feature_name=args.feature_name, ctx=CTX) net.init_params(CTX) if args.pretrained_model != "": net.collect_params().load(args.pretrained_model, CTX) logger.info("loading {}".format(args.pretrained_model)) lr_schdl = mx.lr_scheduler.FactorScheduler(step=80000, factor=0.5) trainer = mx.gluon.trainer.Trainer(net.collect_params(), 'sgd', optimizer_params={ 'learning_rate': args.learning_rate, 'wd': args.weight_decay, "lr_scheduler": lr_schdl, 'momentum': 0.9 }) anchors_count = len(cfg.anchor_ratios) * len(cfg.anchor_scales) files = glob.glob(os.path.join(args.save_path, "logs", "*")) for f in files: os.remove(f) for epoch in range(0, args.epochs): loss_interval = 0.0 rpn_loss_cls_interval = 0.0 rpn_loss_reg_interval = 0.0 rcnn_loss_cls_interval = 0.0 rcnn_loss_reg_interval = 0.0 for it, (datas, labels) in enumerate(train_datait): datas_lst = mx.gluon.utils.split_and_load(datas, CTX) labels_lst = mx.gluon.utils.split_and_load(labels, CTX) with mx.autograd.record(): losses = [] for data, label in zip(datas_lst, labels_lst): _n, _c, h, w = data.shape rpn_cls, rpn_reg, f = net.rpn(data) f_height = f.shape[2] f_width = f.shape[3] rpn_cls_gt, rpn_reg_gt = rpn_gt_opr( rpn_reg.shape, label, data.context, h, w) rpn_bbox_sampled, rcnn_reg_target, rcnn_cls_target = proposal_train( rpn_cls, rpn_reg, label, f.shape, data.shape, data.context) # RPN Loss part # Reshape and transpose to the shape of gt rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width)) rpn_cls = mx.nd.transpose(rpn_cls, (0, 1, 3, 4, 2)) rpn_reg = mx.nd.transpose( rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 1, 3, 4, 2)) mask = (rpn_cls_gt == 1).reshape( (1, anchors_count, f_height, f_width, 1)).broadcast_to( (1, anchors_count, f_height, f_width, 4)) rpn_loss_reg = mx.nd.sum( mx.nd.smooth_l1((rpn_reg - rpn_reg_gt) * mask, 3.0)) / mx.nd.sum(mask) rpn_loss_cls = softmax_celoss_with_ignore( rpn_cls.reshape((-1, 2)), rpn_cls_gt.reshape((-1, )), -1) # RCNN part # add batch dimension rpn_bbox_sampled = mx.nd.concatenate([ mx.nd.zeros((rpn_bbox_sampled.shape[0], 1), data.context), rpn_bbox_sampled ], axis=1) f = mx.nd.ROIPooling(f, rpn_bbox_sampled, (7, 7), 1.0 / 16) # VGG16 based spatial stride=16 rcnn_cls, rcnn_reg = net.rcnn(f) mask = (rcnn_cls_target > 0).reshape( (rcnn_cls_target.shape[0], 1)).broadcast_to( (rcnn_cls_target.shape[0], 4 * cfg.num_classes)) rcnn_loss_reg = mx.nd.sum( mx.nd.smooth_l1((rcnn_reg - rcnn_reg_target) * mask, 1.0)) / mx.nd.sum(mask) rcnn_loss_cls = mx.nd.softmax_cross_entropy( rcnn_cls, rcnn_cls_target) / rcnn_cls.shape[0] loss = rpn_loss_cls + rpn_loss_reg + rcnn_loss_cls + rcnn_loss_reg losses.append(loss) loss_interval += loss.asscalar() rpn_loss_cls_interval += rpn_loss_cls.asscalar() rpn_loss_reg_interval += rpn_loss_reg.asscalar() rcnn_loss_cls_interval += rcnn_loss_cls.asscalar() rcnn_loss_reg_interval += rcnn_loss_reg.asscalar() for loss in losses: loss.backward() trainer.step(datas.shape[0]) mx.nd.waitall() if it % args.log_train_freq == 0: logger.info( "Epoch {:=2d}, train [{:>6d}/{:<6d}], loss={:>6.5f}, rpn_loss_cls={:>6.5f}, rpn_loss_reg={:>6.5f}, rcnn_loss_cls={:>6.5f}, rcnn_loss_reg={:>6.5f}, lr={:>6.5f}" .format( epoch, it, int(train_ds.__len__() / args.batch_size), loss_interval / args.log_train_freq / args.batch_size, rpn_loss_cls_interval / args.log_train_freq / args.batch_size, rpn_loss_reg_interval / args.log_train_freq / args.batch_size, rcnn_loss_cls_interval / args.log_train_freq / args.batch_size, rcnn_loss_reg_interval / args.log_train_freq / args.batch_size, trainer.learning_rate)) # tensorboard info with SummaryWriter( logdir=os.path.join(args.save_path, "logs")) as sw: sw.add_scalar(tag='Loss', value=(loss_interval / args.log_train_freq / args.batch_size), global_step=it) sw.add_scalar(tag='LR', value=trainer.learning_rate, global_step=it) loss_interval = 0.0 rpn_loss_cls_interval = 0.0 rpn_loss_reg_interval = 0.0 rcnn_loss_cls_interval = 0.0 rcnn_loss_reg_interval = 0.0 if it % args.save_interval == 0: save_schema = os.path.split(args.save_path)[1] + "-{}" net.collect_params().save( os.path.join(args.save_path, save_schema.format(it) + ".gluonmodel")) benchmark( net, CTX[0], os.path.join(args.save_path, save_schema.format(it) + ".benchmark")) pr_curve = evals(cfg.test_dataset_json_lst[0], os.path.join( args.save_path, save_schema.format(it) + ".benchmark"), topk=1000, reset=True) ap = np.trapz(pr_curve[:, 0], pr_curve[:, 1]) # tensorboard info with SummaryWriter( logdir=os.path.join(args.save_path, "logs")) as sw: sw.add_scalar(tag='AP', value=ap, global_step=it)