def get_dataset(dataset, args): if dataset.lower() == 'voc': #train_dataset = VOCLike(root='/opt/ml/input/data/training', splits=((2019, 'train'),)) #val_dataset = VOCLike(root='/opt/ml/input/data/training', splits=((2018, 'val'),)) train_dataset = VOCLike( root='~/code/gluoncv-yolo-playing_cards/VOCTemplate', splits=((2019, 'train'), )) val_dataset = VOCLike( root='~/code/gluoncv-yolo-playing_cards/VOCTemplate', splits=((2018, 'val'), )) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) elif dataset.lower() == 'coco': train_dataset = gdata.COCODetection(splits='instances_train2017', use_crowd=False) val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False) val_metric = COCODetectionMetric(val_dataset, args.save_prefix + '_eval', cleanup=True, data_shape=(args.data_shape, args.data_shape)) else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): train_dataset = gdata.RecordFileDetection( os.path.join(os.environ['SM_CHANNEL_TRAIN'], 'train.rec')) val_dataset = gdata.RecordFileDetection( os.path.join(os.environ['SM_CHANNEL_VAL'], 'val.rec')) object_categories = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=object_categories) args.no_random_shape = True # if dataset.lower() == 'voc': # train_dataset = gdata.VOCDetection( # splits=[(2007, 'trainval'), (2012, 'trainval')]) # val_dataset = gdata.VOCDetection( # splits=[(2007, 'test')]) # val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) # elif dataset.lower() == 'coco': # train_dataset = gdata.COCODetection(splits='instances_train2017', use_crowd=False) # val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False) # val_metric = COCODetectionMetric( # val_dataset, args.save_prefix + '_eval', cleanup=True, # data_shape=(args.data_shape, args.data_shape)) # else: # raise NotImplementedError('Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): if dataset.lower() == 'voc': train_dataset = gdata.VOCDetection( splits=[(2007, 'trainval'), (2012, 'trainval')]) val_dataset = gdata.VOCDetection(splits=[(2007, 'test')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) elif dataset.lower() == 'coco': train_dataset = gdata.COCODetection(splits='instances_train2017', use_crowd=False) val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False) val_metric = COCODetectionMetric(val_dataset, args.save_prefix + '_eval', cleanup=True, data_shape=(args.data_shape, args.data_shape)) else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): if dataset.lower() == 'coco': train_dataset = gdata.COCODetection(root='./data/coco', splits='instances_train2017', use_crowd=False) else: raise NotImplementedError('Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: # is broken now. do not try from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset
def get_dataset(args): train_dataset = gcv.data.RecordFileDetection(args.train_dataset) val_dataset = gcv.data.RecordFileDetection(args.validate_dataset) classes = read_classes(args) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=classes) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): if dataset.lower() == 'voc': if args.val_2012 == True: train_dataset = gdata.VOCDetection( splits=[('sbdche', 'train_voc2012_bboxwh')]) val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val_2012_bboxwh')]) else: train_dataset = gdata.VOCDetection(splits=[('sbdche', 'train' + '_' + '8' + '_bboxwh')]) val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val' + '_' + '8' + '_bboxwh')]) val_metric = VOC07MApMetric(iou_thresh=0.7, class_names=val_dataset.classes) val_polygon_metric = VOC07PolygonMApMetric( iou_thresh=0.7, class_names=val_dataset.classes) elif dataset.lower() == 'coco_pretrain': train_dataset = gdata.coco_pretrain_Detection( splits=[('_coco_20', 'train' + '_' + '8' + '_bboxwh')]) if args.val_2012 == True: val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val_2012_bboxwh')]) else: val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val' + '_' + '8' + '_bboxwh')]) val_metric = VOC07MApMetric(iou_thresh=0.7, class_names=val_dataset.classes) val_polygon_metric = VOC07PolygonMApMetric( iou_thresh=0.7, class_names=val_dataset.classes) elif dataset.lower() == 'coco': train_dataset = gdata.cocoDetection( root='/home/tutian/dataset/coco_to_voc/train', subfolder='./bases_50_xml_each_' + 'var') val_dataset = gdata.cocoDetection( root='/home/tutian/dataset/coco_to_voc/val', subfolder='./bases_50_xml_' + 'raw_coef') val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) # val_polygon_metric = New07PolygonMApMetric(iou_thresh=0.5, class_names=val_dataset.classes, root='/home/tutian/dataset/coco_to_voc/val/') val_polygon_metric = None else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric, val_polygon_metric
def get_dataset(dataset, args): train_dataset = gdata.VOCDetection( splits=[(2007, 'trainval'), (2012, 'trainval')]) val_dataset = gdata.VOCDetection( splits=[(2007, 'test')]) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset
def get_dataset(dataset, args): train_dataset = CustomDetection(root='', splits=((2018, 'train'), )) val_dataset = CustomDetection(root='', splits=((2018, 'valid'), )) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) # print("len---------------=" + type(val_metric)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(args): train_dataset = gcv.data.RecordFileDetection( os.path.join(args.train, 'birds_ssd_sample_train.rec')) val_dataset = gcv.data.RecordFileDetection( os.path.join(args.test, 'birds_ssd_sample_val.rec')) val_metric = VOC07MApMetric(iou_thresh=0.5) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(args): print(os.listdir('/opt/ml/input/data/')) print(os.listdir('/opt/ml/input/data/train/')) train_dataset = GroundTruthDataset(args.train,'train',args.images,"labels") val_dataset = GroundTruthDataset(args.test,'validation',args.images,"labels") val_metric = VOC07MApMetric(iou_thresh=0.5) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(train_dir, test_dir, args): train_dataset = RecordFileDetection(os.path.join( train_dir, 'birds_ssd_sample_train.rec'), coord_normalized=True) val_dataset = RecordFileDetection(os.path.join(test_dir, 'birds_ssd_sample_val.rec'), coord_normalized=True) val_metric = VOC07MApMetric(iou_thresh=0.5) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): if dataset.lower() == 'voc': train_dataset = LstDetection('train.txt', root=os.path.expanduser('.')) val_dataset = LstDetection('val.txt', root=os.path.expanduser('.')) train_dataset.classes = MY_CLASSES val_dataset.classes = MY_CLASSES val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=MY_CLASSES) else: raise NotImplementedError('Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def loadDataset(self): train_dataset = self.train_dataset.getDatasetForTraining() val_dataset = None if self.args.validate_dataset: val_dataset = self.val_dataset.getDatasetForTraining() self.eval_metric = self.getValidationMetric() if self.args.num_samples < 0: self.args.num_samples = len(train_dataset) if self.args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) self.train_data, self.val_data = self.getDataloader( train_dataset, val_dataset)
def load_data(): train_dataset = gcv.data.RecordFileDetection( os.path.join(args.train, 'train.rec')) val_dataset = gcv.data.RecordFileDetection( os.path.join(args.test, 'val.rec')) classes_df = pd.read_csv(CLASSES_FILE, header=None) classes = classes_df[1].tolist() if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, classes
def get_dataset(dataset, args): if dataset.lower() == 'voc': train_dataset = VOCLike(root=r'D:\QMDownload\VOCdevkit', splits=[(2028, 'trainval')]) val_dataset = VOCLike(root=r'D:\QMDownload\VOCdevkit', splits=[(2028, 'test')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): train_dataset = gdata.RecordFileDetection( os.path.join(os.environ['SM_CHANNEL_TRAIN'], 'train.rec')) val_dataset = gdata.RecordFileDetection( os.path.join(os.environ['SM_CHANNEL_VAL'], 'val.rec')) object_categories = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=object_categories) args.no_random_shape = True if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(args): print(os.listdir('/opt/ml/input/data/')) print(os.listdir('/opt/ml/input/data/train/')) #print(os.listdir('/opt/ml/input/data/train/')) #print(os.listdir('/opt/ml/input/data/train-manifest/')) #train_dataset = gcv.data.RecordFileDetection(os.path.join('/opt/ml/input/data','train_0')) #val_dataset = gcv.data.RecordFileDetection(os.path.join('/opt/ml/input/test','test_0')) train_dataset = GroundTruthDataset(args.train, 'train', args.images, "label-job-test") val_dataset = GroundTruthDataset(args.test, 'validation', args.images, "label-job-test") val_metric = VOC07MApMetric(iou_thresh=0.5) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(args): train_dataset = CycleDataset(root='data/filtered/', split_id='001', split="train", cache_frames=True, percent=0.1) val_dataset = CycleDataset(root='data/filtered/', split_id='001', split="val", cache_frames=True, percent=0.1) val_metric = VOCMApMetric(iou_thresh=0.5, class_names=val_dataset.classes) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): if dataset.lower() == 'voc': root = '/data02/wangqiushi/datasets/DR/' image_root = os.path.join(root, 'Images') train_label_file = os.path.join(root, 'mxnet_rec', 't90', 'train.txt') val_label_file = os.path.join(root, 'mxnet_rec', 't90', 'valid.txt') classes = ('30', '40', '50') map_file = os.path.join(root, 'mxnet_rec', 'DR_map.txt') train_dataset = DetectionDataset(image_root, train_label_file, classes, map_file=map_file) val_dataset = DetectionDataset(image_root, val_label_file, classes, map_file=map_file, shuffle=False) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) elif dataset.lower() == 'coco': train_dataset = gdata.COCODetection(splits='instances_train2017', use_crowd=False) val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False) val_metric = COCODetectionMetric(val_dataset, args.save_prefix + '_eval', cleanup=True, data_shape=(args.data_shape, args.data_shape)) else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(dataset, args): if dataset.lower() == 'voc': train_dataset = gdata.VOCDetection(splits=[('sbdche', 'train' + '_' + '8' + '_bboxwh')]) if args.val_2012 == True: val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val_2012_bboxwh')]) else: val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val' + '_' + '8' + '_bboxwh')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) val_polygon_metric = VOC07PolygonMApMetric( iou_thresh=0.5, class_names=val_dataset.classes) elif dataset.lower() == 'coco_pretrain': train_dataset = gdata.coco_pretrain_Detection( splits=[('_coco_20', 'train' + '_' + '8' + '_bboxwh')]) if args.val_2012 == True: val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val_2012_bboxwh')]) else: val_dataset = gdata.VOC_Val_Detection(splits=[('sbdche', 'val' + '_' + '8' + '_bboxwh')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) val_polygon_metric = VOC07PolygonMApMetric( iou_thresh=0.5, class_names=val_dataset.classes) else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric, val_polygon_metric
def get_dataset(dataset, args): train_dataset = gdata.RecordFileDetection( os.path.join(os.environ["SM_CHANNEL_TRAIN"], "train.rec") ) val_dataset = gdata.RecordFileDetection(os.path.join(os.environ["SM_CHANNEL_VAL"], "val.rec")) object_categories = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ] val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=object_categories) args.no_random_shape = True if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def get_dataset(): class VOCLike(VOCDetection): CLASSES = my_class def __init__(self, root, splits, transform=None, index_map=None, preload_label=True): super(VOCLike, self).__init__(root, splits, transform, index_map, preload_label) train_dataset = VOCLike(root='mxnet-bdd', splits=((2018, 'train'), )) val_dataset = VOCLike(root='mxnet-bdd', splits=((2018, 'test'), )) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) if args.num_samples < 0: args.num_samples = len(train_dataset) if args.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric
def train(net, async_net, ctx, args): """Training pipeline""" net.collect_params().reset_ctx(ctx) if args.no_wd: for k, v in net.collect_params(".*beta|.*gamma|.*bias").items(): v.wd_mult = 0.0 if args.label_smooth: net._target_generator._label_smooth = True if args.lr_decay_period > 0: lr_decay_epoch = list( range(args.lr_decay_period, args.epochs, args.lr_decay_period)) else: lr_decay_epoch = [int(i) for i in args.lr_decay_epoch.split(',')] lr_scheduler = LRSequential([ LRScheduler("linear", base_lr=0, target_lr=args.lr, nepochs=args.warmup_epochs, iters_per_epoch=args.batch_size), LRScheduler(args.lr_mode, base_lr=args.lr, nepochs=args.epochs - args.warmup_epochs, iters_per_epoch=args.batch_size, step_epoch=lr_decay_epoch, step_factor=args.lr_decay, power=2), ]) if (args.optimizer == "sgd"): trainer = gluon.Trainer(net.collect_params(), args.optimizer, { "wd": args.wd, "momentum": args.momentum, "lr_scheduler": lr_scheduler }, kvstore="local") elif (args.optimizer == "adam"): trainer = gluon.Trainer(net.collect_params(), args.optimizer, {"lr_scheduler": lr_scheduler}, kvstore="local") else: trainer = gluon.Trainer(net.collect_params(), args.optimizer, kvstore="local") # targets #sigmoid_ce = gluon.loss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False) #l1_loss = gluon.loss.L1Loss() # Intermediate Metrics: train_metrics = ( mx.metric.Loss("ObjLoss"), mx.metric.Loss("BoxCenterLoss"), mx.metric.Loss("BoxScaleLoss"), mx.metric.Loss("ClassLoss"), mx.metric.Loss("TotalLoss"), ) train_metric_ixs = range(len(train_metrics)) target_metric_ix = -1 # Train towards TotalLoss (the last one) # Evaluation Metrics: val_metric = VOC07MApMetric(iou_thresh=0.5) # Data transformations: train_batchify_fn = Tuple(*([Stack() for _ in range(6)] + [Pad(axis=0, pad_val=-1) for _ in range(1)])) train_transforms = (YOLO3DefaultTrainTransform( args.data_shape, args.data_shape, net=async_net, mixup=args.mixup) if args.no_random_shape else [ YOLO3DefaultTrainTransform( x * 32, x * 32, net=async_net, mixup=args.mixup) for x in range(10, 20) ]) validation_batchify_fn = None validation_transforms = None if args.validation: validation_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) validation_transforms = YOLO3DefaultValTransform( args.data_shape, args.data_shape) logger.info(args) logger.info(f"Start training from [Epoch {args.start_epoch}]") prev_best_score = float("-inf") best_epoch = args.start_epoch logger.info("Sleeping for 3s in case training data file not yet ready") time.sleep(3) for epoch in range(args.start_epoch, args.epochs): # if args.mixup: # # TODO(zhreshold): more elegant way to control mixup during runtime # try: # train_data._dataset.set_mixup(np.random.beta, 1.5, 1.5) # except AttributeError: # train_data._dataset._data.set_mixup(np.random.beta, 1.5, 1.5) # if epoch >= args.epochs - args.no_mixup_epochs: # try: # train_data._dataset.set_mixup(None) # except AttributeError: # train_data._dataset._data.set_mixup(None) tic = time.time() btic = time.time() mx.nd.waitall() net.hybridize() logger.debug( f'Input data dir contents: {os.listdir("/opt/ml/input/data/")}') train_data_gen = pipe_detection_minibatch( epoch, channel=args.train, batch_size=args.stream_batch_size) for ix_streambatch, train_dataset in enumerate(train_data_gen): # TODO: Mixup is kinda rubbish if it's only within a (potentially small) batch if args.mixup: train_dataset = MixupDetection(train_dataset) # Create dataloader for the stream-batch: if args.no_random_shape: logger.debug( "Creating train DataLoader without random transform") train_dataloader = gluon.data.DataLoader( train_dataset.transform(train_transforms), batch_size=args.batch_size, batchify_fn=train_batchify_fn, last_batch="discard", num_workers=args.num_workers, shuffle=True, ) else: logger.debug("Creating train DataLoader with random transform") train_dataloader = RandomTransformDataLoader( train_transforms, train_dataset, interval=10, batch_size=args.batch_size, batchify_fn=train_batchify_fn, last_batch="discard", num_workers=args.num_workers, shuffle=True, ) if args.mixup: logger.debug("Shuffling stream-batch") # TODO(zhreshold): more elegant way to control mixup during runtime try: train_dataloader._dataset.set_mixup( np.random.beta, 1.5, 1.5) except AttributeError: train_dataloader._dataset._data.set_mixup( np.random.beta, 1.5, 1.5) if epoch >= args.epochs - args.no_mixup_epochs: try: train_dataloader._dataset.set_mixup(None) except AttributeError: train_dataloader._dataset._data.set_mixup(None) logger.debug( f"Training on stream-batch {ix_streambatch} ({len(train_dataset)} records)" ) # TODO: Improve stream-batching robustness to drop loop guard clauses # While it would be nice to simply `for i, batch in enumerate(train_dataloader):`, # corrupted image buffers are somehow sneaking through the stream-batch at the moment. # # For now, we catch and tolerate these errors - trying to resume stream-batch process # where possible and otherwise discarding the remainder of the stream-batch :-( done = False i = -1 dataiter = iter(train_dataloader) while not done: i += 1 batch = None while not batch: try: batch = next(dataiter) except StopIteration: done = True break except ValueError: # Some problem with the minibatch prevented loading - try the next logger.warn( f"[Epoch {epoch}][Streambatch {ix_streambatch}] " f"Failed to load minibatch {i}, trying next...") i += 1 except: logger.error( f"[Epoch {epoch}][Streambatch {ix_streambatch}] " f"Failed to iterate minibatch {i}: Discarding remainder" ) break if not batch: logger.debug( f"[Epoch {epoch}][Streambatch {ix_streambatch}] " f"Done after {i} minibatches") break logger.debug( f"Epoch {epoch}, stream batch {ix_streambatch}, minibatch {i}" ) batch_size = batch[0].shape[0] data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) # objectness, center_targets, scale_targets, weights, class_targets fixed_targets = [ gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0, even_split=False) for it in range(1, 6) ] gt_boxes = gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0, even_split=False) loss_trackers = tuple([] for metric in train_metrics) with autograd.record(): for ix, x in enumerate(data): losses_raw = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets]) # net outputs: [obj_loss, center_loss, scale_loss, cls_loss] # Each a mx.ndarray 1xbatch_size. This is the same order as our # train_metrics, so we just need to add a total vector: total_loss = sum(losses_raw) losses = losses_raw + [total_loss] # If any sample's total loss is non-finite, sum will be: if not isfinite(sum(total_loss)): logger.error( f"[Epoch {epoch}][Streambatch {ix_streambatch}][Minibatch {i}] " f"got non-finite losses: {losses_raw}") # TODO: Terminate training if losses or gradient go infinite? for ix in train_metric_ixs: loss_trackers[ix].append(losses[ix]) autograd.backward(loss_trackers[target_metric_ix]) trainer.step(batch_size) for ix in train_metric_ixs: train_metrics[ix].update(0, loss_trackers[ix]) if args.log_interval and not (i + 1) % args.log_interval: train_metrics_current = map(lambda metric: metric.get(), train_metrics) metrics_msg = "; ".join([ f"{name}={val:.3f}" for name, val in train_metrics_current ]) logger.info( f"[Epoch {epoch}][Streambatch {ix_streambatch}][Minibatch {i}] " f"LR={trainer.learning_rate:.2E}; " f"Speed={batch_size/(time.time()-btic):.3f} samples/sec; {metrics_msg};" ) btic = time.time() train_metrics_current = map(lambda metric: metric.get(), train_metrics) metrics_msg = "; ".join( [f"{name}={val:.3f}" for name, val in train_metrics_current]) logger.info( f"[Epoch {epoch}] TrainingCost={time.time()-tic:.3f}; {metrics_msg};" ) if not (epoch + 1) % args.val_interval: logger.info(f"Validating [Epoch {epoch}]") metric_names, metric_values = validate( net, args.validation, epoch, ctx, VOC07MApMetric(iou_thresh=0.5), validation_transforms, validation_batchify_fn, args) if isinstance(metric_names, list): val_msg = "; ".join( [f"{k}={v}" for k, v in zip(metric_names, metric_values)]) current_score = float(metric_values[-1]) else: val_msg = f"{metric_names}={metric_values}" current_score = metric_values logger.info(f"[Epoch {epoch}] Validation: {val_msg};") else: current_score = float("-inf") save_progress(net, current_score, prev_best_score, args.model_dir, epoch, args.checkpoint_interval, args.checkpoint_dir) if current_score > prev_best_score: prev_best_score = current_score best_epoch = epoch if (args.early_stopping and epoch >= args.early_stopping_min_epochs and (epoch - best_epoch) >= args.early_stopping_patience): logger.info( f"[Epoch {epoch}] No improvement since epoch {best_epoch}: Stopping early" ) break
def get_dataset(dataset_name, dataset_val_name, save_prefix=''): train_datasets = list() val_datasets = list() if len(dataset_val_name) == 0: dataset_val_name = dataset_name # if dataset_name.lower() == 'voc': if 'voc' in dataset_name: train_datasets.append(VOCDetection(splits=[(2007, 'trainval'), (2012, 'trainval')], features_dir=FLAGS.features_dir)) if 'voc' in dataset_val_name: val_datasets.append(VOCDetection(splits=[(2007, 'test')], features_dir=FLAGS.features_dir)) val_metric = VOCMApMetric(iou_thresh=0.5, class_names=val_datasets[-1].classes) if 'coco' in dataset_name: train_datasets.append(COCODetection(splits=['instances_train2017'], use_crowd=False)) if 'coco' in dataset_val_name: val_datasets.append(COCODetection(splits=['instances_val2017'], allow_empty=True)) val_metric = COCODetectionMetric(val_datasets[-1], save_prefix + '_eval', cleanup=True, data_shape=(FLAGS.data_shape, FLAGS.data_shape)) if 'det' in dataset_name: train_datasets.append(ImageNetDetection(splits=['train'], allow_empty=FLAGS.allow_empty)) if 'det' in dataset_val_name: val_datasets.append(ImageNetDetection(splits=['val'], allow_empty=FLAGS.allow_empty)) val_metric = VOCMApMetric(iou_thresh=0.5, class_names=val_datasets[-1].classes) if 'vid' in dataset_name: train_datasets.append(ImageNetVidDetection(splits=[(2017, 'train')], allow_empty=FLAGS.allow_empty, every=FLAGS.every, window=FLAGS.window, features_dir=FLAGS.features_dir, mult_out=FLAGS.mult_out)) if 'vid' in dataset_val_name: val_datasets.append(ImageNetVidDetection(splits=[(2017, 'val')], allow_empty=FLAGS.allow_empty, every=FLAGS.every, window=FLAGS.window, features_dir=FLAGS.features_dir, mult_out=FLAGS.mult_out)) if FLAGS.mult_out: val_metric = VOCMApMetricTemporal(t=int(FLAGS.window[0]), iou_thresh=0.5, class_names=val_datasets[-1].classes) else: val_metric = VOCMApMetric(iou_thresh=0.5, class_names=val_datasets[-1].classes) if len(train_datasets) == 0: raise NotImplementedError('Dataset: {} not implemented.'.format(dataset_name)) elif len(train_datasets) == 1: train_dataset = train_datasets[0] else: train_dataset = CombinedDetection(train_datasets, class_tree=True) if len(val_datasets) == 0: raise NotImplementedError('Dataset: {} not implemented.'.format(dataset_name)) elif len(val_datasets) == 1 and len(train_datasets) == 1: val_dataset = val_datasets[0] else: val_dataset = CombinedDetection(val_datasets, class_tree=True, validation=True) val_metric = VOCMApMetric(iou_thresh=0.5, class_names=val_dataset.classes) if FLAGS.mixup: from gluoncv.data import MixupDetection train_dataset = MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric