# # To speed up training, we let CPU to pre-compute some training targets. # This is especially nice when your CPU is powerful and you can use ``-j num_workers`` # to utilize multi-core CPU. ############################################################################## # If we provide anchors to the training transform, it will compute training targets train_transform = presets.ssd.SSDDefaultTrainTransform(width, height, anchors) train_loader = DetectionDataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True, last_batch='rollover', num_workers=num_workers) from gluoncv.loss import SSDMultiBoxLoss mbox_loss = SSDMultiBoxLoss() for ib, batch in enumerate(train_loader): if ib > 0: break print('data:', batch[0].shape) print('class targets:', batch[1].shape) print('box targets:', batch[2].shape) with autograd.record(): cls_pred, box_pred, anchors = net(batch[0]) sum_loss, cls_loss, box_loss = mbox_loss(cls_pred, box_pred, batch[1], batch[2]) # some standard gluon training steps: # autograd.backward(sum_loss) # trainer.step(1)
def train_ssd300_coco(net, train_data_loader, val_data_loader, eval_metric, ctx, consts, logger): net.collect_params().reset_ctx(ctx) net_optimizer = Trainer(net.collect_params(), optimizer='sgd', optimizer_params={ 'learning_rate': consts.LR, 'wd': consts.WD, 'momentum': consts.MOMENTUM }) lr_decay = float(consts.LR_DECAY) lr_steps = sorted( [float(ls) for ls in consts.LR_DECAY_EPOCH if ls.strip()]) mbox_loss = SSDMultiBoxLoss() ce_metric = mx.metric.Loss('CrossEntropy') smoothl1_metric = mx.metric.Loss('SmoothL1') best_mean_avg_prec = [0] logger.info(consts) logger.info(f'Starting from [Epoch {consts.START_EPOCH}]') for epoch in range(consts.START_EPOCH, consts.EPOCHS): while lr_steps and epoch >= lr_steps[0]: new_lr = net_optimizer.learning_rate * lr_decay lr_steps.pop(0) net_optimizer.set_learning_rate(new_lr) logger.info(f'[Epoch {epoch}] learning rate = {new_lr}') ce_metric.reset() smoothl1_metric.reset() epoch_tic = time.time() batch_tic = time.time() net.hybridize(static_alloc=True, static_shape=True) for i, batch in enumerate(train_data_loader): data = utils.split_and_load(batch[0], ctx_list=ctx) cls_targets = utils.split_and_load(batch[1], ctx_list=ctx) box_targets = utils.split_and_load(batch[2], ctx_list=ctx) with autograd.record(): cls_predictions = [] box_predictions = [] for x in data: cls_prediction, box_prediction, _ = net(x) cls_predictions.append(cls_prediction) box_predictions.append(box_prediction) sum_loss, cls_loss, box_loss = mbox_loss( cls_predictions, box_predictions, cls_targets, box_targets) autograd.backward(sum_loss) net_optimizer.step(1) ce_metric.update(0, [l * consts.BATCH_SIZE for l in cls_loss]) smoothl1_metric.update(0, [l * consts.BATCH_SIZE for l in box_loss]) if not (i + 1) % consts.LOG_INTERVAL: ce_name, ce_loss = ce_metric.get() sl1_name, sl1_loss = smoothl1_metric.get() t_now = time.time() speed = consts.BATCH_SIZE / (t_now - batch_tic) logger.info( f'[Epoch {epoch}][Batch {i}], Speed: {speed:.3f} samples/sec, ' f'{ce_name}={ce_loss:.3f}, {sl1_name}={sl1_loss:.3f}') batch_tic = time.time() ce_name, ce_loss = ce_metric.get() sl1_name, sl1_loss = smoothl1_metric.get() epoch_time = time.time() - epoch_tic logger.info(f'[Epoch {epoch}], epoch time: {epoch_time:.3f},' f'{ce_name}={ce_loss:.3f}, {sl1_name}={sl1_loss:.3f}') if not epoch % consts.VAL_INTERVAL or not epoch % consts.SAVE_INTERVAL: mean_avg_prec_name, mean_avg_prec = validate_ssd300_coco( net, val_data_loader, ctx, eval_metric) val_msg = '\n'.join([ f'{k}={v}' for k, v in zip(mean_avg_prec_name, mean_avg_prec) ]) logger.info(f'[Epoch {epoch}] validation: \n{val_msg}') curr_mean_avg_prec = float(mean_avg_prec[-1]) else: curr_mean_avg_prec = 0 save_params(net, best_mean_avg_prec, curr_mean_avg_prec, epoch, consts.SAVE_INTERVAL, consts.SAVE_PREFIX)
class Trainer: BATCH_SIZE = 16 NUM_EPOCHS = 13 classes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] ctx = mx.gpu() mboxLoss = SSDMultiBoxLoss() def __init__(self): self.net = JanetRes(classes=self.classes, use_bn=True) self.net.initialize(ctx=self.ctx) self.trainIter = image.ImageDetIter( batch_size=self.BATCH_SIZE, data_shape=(3, 300, 300), path_imgrec='../DataX/annoTrainX.rec', path_imgidx='../DataX/annoTrainX.idx', path_imglist='../DataX/annoTrainX.lst', path_root='../DataX/', shuffle=True, mean=True, brightness=0.3, contrast=0.3, saturation=0.3, pca_noise=0.3, hue=0.3) with autograd.train_mode(): _, _, anchors = self.net( mx.ndarray.zeros(shape=(self.BATCH_SIZE, 3, 300, 300), ctx=self.ctx)) self.T = TargetGenV1(anchors=anchors.as_in_context(mx.cpu()), height=300, width=300) self.net.collect_params().reset_ctx(self.ctx) self.trainer = gluon.Trainer(self.net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'wd': 5e-4 }) def train(self): ce_metric = mx.metric.Loss('CrossEntropy') smoothl1_metric = mx.metric.Loss('SmoothL1') for epoch in range(self.NUM_EPOCHS): print('Commencing epoch', epoch) tic = time() self.trainIter.reset() for i, batch in enumerate(self.trainIter): X = batch.data[0].as_in_context(self.ctx) Y = batch.label[0].as_in_context(self.ctx) with autograd.record(): '''Make Predictions''' clsPreds, bboxPreds, _ = self.net(X) clsTargets, bboxTargets = self.T.generateTargets( Y, clsPreds) sumLoss, clsLoss, bboxLoss = self.mboxLoss( clsPreds, bboxPreds, clsTargets.as_in_context(self.ctx), bboxTargets.as_in_context(self.ctx)) '''Compute Losses''' if (i + 1) % 200 == 0: print( 'B:{}, Loss:{:.3f}, \nClsLoss:{}, \nBboxLoss:{}\n\n' .format(i, mx.nd.mean(sumLoss[0]).asscalar(), clsLoss[0].asnumpy(), bboxLoss[0].asnumpy())) autograd.backward(sumLoss) self.trainer.step(self.BATCH_SIZE) ce_metric.update(0, [l * self.BATCH_SIZE for l in clsLoss]) smoothl1_metric.update(0, [l * self.BATCH_SIZE for l in bboxLoss]) name1, loss1 = ce_metric.get() name2, loss2 = smoothl1_metric.get() print( '[Epoch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'. format(epoch, self.BATCH_SIZE / (time() - tic), name1, loss1, name2, loss2)) self.net.save_parameters('../params/2X7' + 'Xnet' + str(epoch) + '.params')
class Trainer: NUM_EPOCHS = 20 classes = ['1', '2'] ctx = mx.gpu() lossFunction = SSDMultiBoxLoss() def __init__(self, batchSize): self.net = Brunette(classes=self.classes) self.net.initialize(mx.init.Xavier(magnitude=2), ctx=self.ctx) self.batchSize = batchSize self.trainIter = image.ImageDetIter(batch_size=self.batchSize, data_shape=(3, 300, 300), path_imgrec='utils/TrainY.rec', path_imgidx='utils/TrainY.idx', shuffle=True, mean=True, brightness=0.3, contrast=0.3, saturation=0.3, pca_noise=0.3, hue=0.3) # with autograd.train_mode(): # _, _, anchors = self.net(mx.ndarray.zeros(shape=(self.batchSize, 3, 300, 300), ctx=self.ctx)) # self.T = TargetGenV2(anchors=anchors.as_in_context(mx.cpu()), height=300, width=300) print("2") self.net.collect_params().reset_ctx(self.ctx) self.trainer = gluon.Trainer(self.net.collect_params(), 'sgd', { 'learning_rate': 0.1, 'wd': 3e-4 }) def training_targets(self, anchors, class_preds, labels): class_preds = class_preds.transpose( axes=(0, 2, 1)) # batchsize x num_cls x num_anchors box_target, box_mask, cls_target = mx.contrib.ndarray.MultiBoxTarget( anchors, labels, class_preds, overlap_threshold=.5, ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") return box_target, box_mask, cls_target def train(self): print('Preparing to train') metricClass = mx.metric.Loss('CrossEntropy') metricBox = mx.metric.Loss('SmoothL1') for epoch in range(self.NUM_EPOCHS): print('Commencing epoch', epoch) tic = time() self.trainIter.reset() for i, batch in enumerate(self.trainIter): X = batch.data[0].as_in_context(self.ctx) Y = batch.label[0].as_in_context(self.ctx) with autograd.record(): '''Make Predictions''' clsPreds, bboxPreds, anchors = self.net(X) # clsTargets, bboxTargets = self.T.generateTargets(Y, clsPreds) clsTargets, _, bboxTargets = self.training_targets( anchors, clsPreds, Y) print(clsTargets, bboxTargets) return sumLoss, clsLoss, bboxLoss = self.lossFunction( clsPreds, bboxPreds, clsTargets.as_in_context(self.ctx), bboxTargets.as_in_context(self.ctx)) '''Compute Losses''' if (i + 1) % 200 == 0: print( 'B:{}, Loss:{:.3f}, \nClsLoss:{}, \nBboxLoss:{}\n\n' .format(i, mx.nd.mean(sumLoss[0]).asscalar(), clsLoss[0].asnumpy(), bboxLoss[0].asnumpy())) # back-propagate #TODO 1st vs 2nd order derivative?? autograd.backward(sumLoss) self.trainer.step(self.batchSize) metricClass.update(0, [l * self.batchSize for l in clsLoss]) metricBox.update(0, [l * self.batchSize for l in bboxLoss]) name1, loss1 = metricClass.get() name2, loss2 = metricBox.get() print( '[Epoch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'. format(epoch, self.batchSize / (time() - tic), name1, loss1, name2, loss2)) if epoch % 2 != 0: self.net.save_parameters('models/' + 'netV6-0--' + str(epoch) + '.params')