def get_config(model, fake=False): nr_tower = max(get_num_gpu(), 1) assert args.batch % nr_tower == 0 batch = args.batch // nr_tower if fake: logger.info("For benchmark, batch size is fixed to 64 per tower.") dataset_train = FakeData([[64, 224, 224, 3], [64]], 1000, random=False, dtype='uint8') callbacks = [] steps_per_epoch = 100 else: logger.info("Running on {} towers. Batch size per tower: {}".format( nr_tower, batch)) dataset_train = get_imagenet_dataflow(args.data, 'train', batch) dataset_val = get_imagenet_dataflow(args.data, 'val', min(64, batch)) steps_per_epoch = 1281167 // args.batch BASE_LR = 0.1 * args.batch / 256.0 logger.info("BASELR: {}".format(BASE_LR)) callbacks = [ ModelSaver(), EstimatedTimeLeft(), GPUUtilizationTracker(), ScheduledHyperParamSetter('learning_rate', [(0, BASE_LR), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (90, BASE_LR * 1e-3)]), ] if BASE_LR > 0.1: callbacks.append( ScheduledHyperParamSetter('learning_rate', [(0, 0.1), (5 * steps_per_epoch, BASE_LR)], interp='linear', step_based=True)) infs = [ ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5') ] if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append( DataParallelInferenceRunner(dataset_val, infs, list(range(nr_tower)))) return TrainConfig( model=model, dataflow=dataset_train, callbacks=callbacks, steps_per_epoch=steps_per_epoch, max_epoch=100, )
def get_config(model): nr_tower = max(get_num_gpu(), 1) assert args.batch % nr_tower == 0 batch = args.batch // nr_tower logger.info( "Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) if batch < 32 or batch > 64: logger.warn( "Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.") if args.fake: data = QueueInput(FakeData( [[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8')) callbacks = [] else: if args.symbolic: data = TFDatasetInput( get_imagenet_tfdata(args.data, 'train', batch)) else: data = QueueInput(get_imagenet_dataflow(args.data, 'train', batch)) START_LR = 0.1 BASE_LR = START_LR * (args.batch / 256.0) callbacks = [ ModelSaver(), EstimatedTimeLeft(), ScheduledHyperParamSetter( 'learning_rate', [ (0, min(START_LR, BASE_LR)), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (90, BASE_LR * 1e-3), (100, BASE_LR * 1e-4)]), ] if BASE_LR > START_LR: callbacks.append( ScheduledHyperParamSetter( 'learning_rate', [(0, START_LR), (5, BASE_LR)], interp='linear')) infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] dataset_val = get_imagenet_dataflow(args.data, 'val', batch) if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append(DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower)))) if get_num_gpu() > 0: callbacks.append(GPUUtilizationTracker()) return TrainConfig( model=model, data=data, callbacks=callbacks, steps_per_epoch=100 if args.fake else 1281167 // args.batch, max_epoch=105, )
def get_data(name, batch): isTrain = name == 'train' global args augmentors = fbresnet_augmentor(isTrain) if isTrain: print("Training batch:", batch) return get_imagenet_dataflow(args.data, name, batch, augmentors) else: imagenet1k = get_imagenet_dataflow(args.data, name, batch, augmentors) return imagenet1k
def get_data(name, batch): isTrain = name == 'train' if isTrain: augmentors = [ # use lighter augs if model is too small GoogleNetResize(crop_area_fraction=0.49 if args.ratio < 1 else 0.08), imgaug.RandomOrderAug( [imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1] )]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return get_imagenet_dataflow( args.data, name, batch, augmentors)
def get_data(name, batch): isTrain = name == 'train' if isTrain: augmentors = [ GoogleNetResize(crop_area_fraction=0.49), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), # rgb-bgr conversion for the constants copied from fb.resnet.torch imgaug.Lighting( 0.1, eigval=np.asarray([0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array([[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]) ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224)), ] return get_imagenet_dataflow(args.data, name, batch, augmentors)
def get_data(name, batch): isTrain = name == 'train' image_shape = 224 if isTrain: augmentors = [ # use lighter augs if model is too small GoogleNetResize( crop_area_fraction=0.49 if args.width_ratio < 1 else 0.08, target_shape=image_shape), imgaug.RandomOrderAug([ imgaug.BrightnessScale((0.6, 1.4), clip=False), imgaug.Contrast((0.6, 1.4), clip=False), imgaug.Saturation(0.4, rgb=False), ]), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.ResizeShortestEdge(int(image_shape * 256 / 224), cv2.INTER_CUBIC), imgaug.CenterCrop((image_shape, image_shape)), ] return get_imagenet_dataflow(args.data_dir, name, batch, augmentors, meta_dir=args.meta_dir)
def get_data(name, batch, parallel=None): isTrain = name == 'train' augmentors = fbresnet_augmentor(isTrain) return get_imagenet_dataflow(args.data, name, batch, augmentors, parallel=parallel)
def get_data(train_or_test): isTrain = train_or_test == 'train' augs = fbresnet_augmentor(isTrain) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() augs.append(imgaug.MapImage(lambda x: x - pp_mean[16:-16, 16:-16])) ds = get_imagenet_dataflow(args.data, train_or_test, BATCH_SIZE, augs) return ds
def get_data(train_or_test): isTrain = train_or_test == 'train' augs = fbresnet_augmentor(isTrain) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() augs.append(imgaug.MapImage(lambda x: x - pp_mean[16:-16, 16:-16])) data_path = TRAIN_LIST_PATH if train_or_test == 'train' else VALID_LIST_PATH ds = get_imagenet_dataflow(data_path, train_or_test, BATCH_SIZE, augs) return ds
def get_data(name, batch): isTrain = name == 'train' if isTrain: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.RandomCrop(224), imgaug.Lighting(0.1, eigval=np.asarray( [0.2175, 0.0188, 0.0045][::-1]) * 255.0, eigvec=np.array( [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]], dtype='float32')[::-1, ::-1]), imgaug.Flip(horiz=True)] else: augmentors = [ imgaug.ResizeShortestEdge(256, cv2.INTER_CUBIC), imgaug.CenterCrop((224, 224))] return get_imagenet_dataflow(args.data, name, batch, augmentors)
def get_data(name, batch, data_aug=True): isTrain = name == 'train' augmentors = fbresnet_augmentor(isTrain) if data_aug \ else normal_augmentor(isTrain) return get_imagenet_dataflow(args.data, name, batch, augmentors)
def get_data(name, batch): isTrain = name == 'train' global args augmentors = fbresnet_augmentor(isTrain) return get_imagenet_dataflow(args.data, name, batch, augmentors)
parser = argparse.ArgumentParser() parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--fake', help='use fakedata to test or benchmark this model', action='store_true') args = parser.parse_args() logger.set_logger_dir("train_log/imagenet-resnet-keras") tf.keras.backend.set_image_data_format('channels_first') nr_gpu = get_nr_gpu() if args.fake: df_train = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False, dtype='uint8') df_val = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False) else: batch_size = TOTAL_BATCH_SIZE // nr_gpu assert args.data is not None df_train = get_imagenet_dataflow( args.data, 'train', batch_size, fbresnet_augmentor(True)) df_val = get_imagenet_dataflow( args.data, 'val', batch_size, fbresnet_augmentor(False)) def one_hot(label): return np.eye(1000)[label] df_train = MapDataComponent(df_train, one_hot, 1) df_val = MapDataComponent(df_val, one_hot, 1) M = KerasModel( resnet50, inputs_desc=[InputDesc(tf.uint8, [None, 224, 224, 3], 'images')], targets_desc=[InputDesc(tf.float32, [None, 1000], 'labels')], input=df_train, trainer=SyncMultiGPUTrainerReplicated(nr_gpu))
"Pretrained models listed in README were trained with batch=32x8.") parser.add_argument('--mode', choices=['resnet', 'preact', 'se'], help='variants of resnet to use', default='resnet') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu model = Model(args.depth, args.mode) model.data_format = args.data_format if args.weight_decay_norm: model.weight_decay_pattern = ".*/W|.*/gamma|.*/beta" if args.eval: batch = 128 # something that can run on one gpu ds = get_imagenet_dataflow(args.data, 'val', batch) eval_on_ILSVRC12(model, get_model_loader(args.load), ds) else: if args.fake: logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd') else: logger.set_logger_dir( os.path.join('train_log', 'imagenet-{}-d{}-batch{}'.format( args.mode, args.depth, args.batch))) config = get_config(model) if args.load: config.session_init = get_model_loader(args.load) trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1)) launch_train_with_config(config, trainer)
def get_data(name, batch, target_shape): isTrain = name == 'train' augmentors = fbresnet_augmentor(isTrain, target_shape) return get_imagenet_dataflow(args.data, name, batch, augmentors)
help='resnet depth', required=True, type=int, choices=[50, 101, 152]) parser.add_argument('--input', help='an input image') parser.add_argument('--convert', help='npz output file to save the converted model') parser.add_argument('--eval', help='ILSVRC dir to run validation on') parser.add_argument('--image-size', choices=[224, 75], help='image size to feed to resnet') parser.add_argument('--dataset', choices=['val', 'train']) args = parser.parse_args() DEPTH = args.depth param = np.load(args.load, encoding='latin1').item() param = convert_param_name(param) target_shape = args.image_size if args.convert: assert args.convert.endswith('.npz') np.savez_compressed(args.convert, **param) if args.eval: ds = get_imagenet_dataflow(args.eval, args.dataset, 128, get_inference_augmentor(target_shape)) eval_on_ILSVRC12(Model(target_shape), DictRestore(param), ds) elif args.input: run_test(param, args.input, target_shape)
raise logger.info("Name Transform: " + k + ' --> ' + newname) resnet_param[newname] = v return resnet_param if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--load', required=True, help='.npy model file generated by tensorpack.utils.loadcaffe') parser.add_argument('-d', '--depth', help='resnet depth', required=True, type=int, choices=[50, 101, 152]) parser.add_argument('--input', help='an input image') parser.add_argument('--convert', help='npz output file to save the converted model') parser.add_argument('--eval', help='ILSVRC dir to run validation on') args = parser.parse_args() DEPTH = args.depth param = np.load(args.load, encoding='latin1').item() param = convert_param_name(param) if args.convert: assert args.convert.endswith('.npz') np.savez_compressed(args.convert, **param) if args.eval: ds = get_imagenet_dataflow(args.eval, 'val', 128, get_inference_augmentor()) eval_on_ILSVRC12(Model(), DictRestore(param), ds) elif args.input: run_test(param, args.input)
def get_data(dataset_name): isTrain = dataset_name == 'train' augmentors = fbresnet_augmentor(isTrain) return get_imagenet_dataflow( args.data, dataset_name, BATCH_SIZE, augmentors)
args = parser.parse_args() logger.set_logger_dir(os.path.join("train_log", "imagenet-resnet-keras")) tf.keras.backend.set_image_data_format('channels_first') num_gpu = get_num_gpu() if args.fake: df_train = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False, dtype='uint8') df_val = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False) else: batch_size = TOTAL_BATCH_SIZE // num_gpu assert args.data is not None df_train = get_imagenet_dataflow(args.data, 'train', batch_size, fbresnet_augmentor(True)) df_val = get_imagenet_dataflow(args.data, 'val', batch_size, fbresnet_augmentor(False)) def one_hot(label): return np.eye(1000)[label] df_train = MapDataComponent(df_train, one_hot, 1) df_val = MapDataComponent(df_val, one_hot, 1) M = KerasModel( resnet50, inputs_desc=[InputDesc(tf.uint8, [None, 224, 224, 3], 'images')], targets_desc=[InputDesc(tf.float32, [None, 1000], 'labels')], input=df_train, trainer=SyncMultiGPUTrainerReplicated(num_gpu))
def get_data(name, batch, data_aug=True): isTrain = name == 'train' augmentors = fbresnet_augmentor(isTrain) if data_aug \ else normal_augmentor(isTrain) return get_imagenet_dataflow( args.data, name, batch, augmentors)
def main(): global args args = parser.parse_args() cudnn.benchmark = True if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu strong_augmentor = False if args.backbone_net == 'blresnext': backbone = blresnext_model arch_name = "ImageNet-bLResNeXt-{}-{}x{}d-a{}-b{}".format( args.depth, args.cardinality, args.basewidth, args.alpha, args.beta) backbone_setting = [args.depth, args.basewidth, args.cardinality, args.alpha, args.beta] elif args.backbone_net == 'blresnet': backbone = blresnet_model arch_name = "ImageNet-bLResNet-{}-a{}-b{}".format(args.depth, args.alpha, args.beta) backbone_setting = [args.depth, args.alpha, args.beta] elif args.backbone_net == 'blseresnext': backbone = blseresnext_model arch_name = "ImageNet-bLSEResNeXt-{}-{}x{}d-a{}-b{}".format( args.depth, args.cardinality, args.basewidth, args.alpha, args.beta) backbone_setting = [args.depth, args.basewidth, args.cardinality, args.alpha, args.beta] strong_augmentor = True else: raise ValueError("Unsupported backbone.") # add class number and whether or not load pretrained model backbone_setting += [1000, args.pretrained] # create model model = backbone(*backbone_setting) if args.pretrained: print("=> using pre-trained model '{}'".format(arch_name)) else: print("=> creating model '{}'".format(arch_name)) model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer train_criterion = nn.CrossEntropyLoss().cuda() val_criterion = nn.CrossEntropyLoss().cuda() # Data loading code valdir = os.path.join(args.data, 'val') val_loader = get_imagenet_dataflow(False, valdir, args.batch_size, get_augmentor( False, args.input_shape, strong_augmentor), workers=args.workers) log_folder = os.path.join(args.logdir, arch_name) if not os.path.exists(log_folder): os.makedirs(log_folder) if args.evaluate: val_top1, val_top5, val_losses, val_speed = validate(val_loader, model, val_criterion) print('Val@{}: \tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\t' 'Speed: {:.2f} ms/batch\t'.format(args.input_shape, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) return traindir = os.path.join(args.data, 'train') train_loader = get_imagenet_dataflow(True, traindir, args.batch_size, get_augmentor( True, args.input_shape, strong_augmentor), workers=args.workers) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) if args.lr_scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, 30, gamma=0.1) elif args.lr_scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0) else: raise ValueError("Unsupported scheduler.") tensorboard_logger.configure(os.path.join(log_folder)) # optionally resume from a checkpoint best_top1 = 0.0 if args.resume: logfile = open(os.path.join(log_folder, 'log.log'), 'a') if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_top1 = checkpoint['best_top1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logfile = open(os.path.join(log_folder, 'log.log'), 'w') print(args, flush=True) print(model, flush=True) print(args, file=logfile, flush=True) print(model, file=logfile, flush=True) for epoch in range(args.start_epoch, args.epochs): scheduler.step(epoch) try: # get_lr get all lrs for every layer of current epoch, assume the lr for all layers are identical lr = scheduler.get_lr()[0] except Exception as e: lr = None # train for one epoch train_top1, train_top5, train_losses, train_speed, speed_data_loader, train_steps = \ train(train_loader, model, train_criterion, optimizer, epoch + 1) # evaluate on validation set val_top1, val_top5, val_losses, val_speed = validate(val_loader, model, val_criterion) print('Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\t' 'Data loading: {:.2f} ms/batch'.format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), file=logfile, flush=True) print('Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch'.format( epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), file=logfile, flush=True) print('Train: [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch\t' 'Data loading: {:.2f} ms/batch'.format(epoch + 1, args.epochs, train_losses, train_top1, train_top5, train_speed * 1000.0, speed_data_loader * 1000.0), flush=True) print('Val : [{:03d}/{:03d}]\tLoss: {:4.4f}\tTop@1: {:.4f}\tTop@5: {:.4f}\tSpeed: {:.2f} ms/batch'.format( epoch + 1, args.epochs, val_losses, val_top1, val_top5, val_speed * 1000.0), flush=True) # remember best prec@1 and save checkpoint is_best = val_top1 > best_top1 best_top1 = max(val_top1, best_top1) save_dict = {'epoch': epoch + 1, 'arch': arch_name, 'state_dict': model.state_dict(), 'best_top1': best_top1, 'optimizer': optimizer.state_dict(), } save_checkpoint(save_dict, is_best, filepath=log_folder) if lr is not None: tensorboard_logger.log_value('learnnig-rate', lr, epoch + 1) tensorboard_logger.log_value('val-top1', val_top1, epoch + 1) tensorboard_logger.log_value('val-loss', val_losses, epoch + 1) tensorboard_logger.log_value('train-top1', train_top1, epoch + 1) tensorboard_logger.log_value('train-loss', train_losses, epoch + 1) tensorboard_logger.log_value('best-val-top1', best_top1, epoch + 1) logfile.close()
def get_data(dataset_name): isTrain = dataset_name == 'train' augmentors = fbresnet_augmentor(isTrain) return get_imagenet_dataflow(args.data, dataset_name, BATCH_SIZE, augmentors)
def get_data(name, batch): isTrain = name == 'train' # 根据是否是训练数据获得对应的augmentors augmentors = fbresnet_augmentor(isTrain) return get_imagenet_dataflow(args.data, name, batch, augmentors) # 返回图像数据流?
'--load', required=True, help='.npy model file generated by tensorpack.utils.loadcaffe') parser.add_argument('-d', '--depth', help='resnet depth', required=True, type=int, choices=[50, 101, 152]) parser.add_argument('--input', help='an input image') parser.add_argument('--convert', help='npz output file to save the converted model') parser.add_argument('--eval', help='ILSVRC dir to run validation on') args = parser.parse_args() DEPTH = args.depth param = np.load(args.load, encoding='latin1').item() param = convert_param_name(param) if args.convert: assert args.convert.endswith('.npz') np.savez_compressed(args.convert, **param) if args.eval: ds = get_imagenet_dataflow(args.eval, 'val', 128, get_inference_augmentor()) eval_on_ILSVRC12(Model(), DictRestore(param), ds) elif args.input: run_test(param, args.input)
def get_data(name, batch): isTrain = name == 'train' augmentors = fbresnet_augmentor(isTrain) return get_imagenet_dataflow( args.data, name, batch, augmentors)