def get_config(): nr_tower = max(get_num_gpu(), 1) batch = args.batch total_batch = batch * nr_tower if total_batch != 128: logger.warn("AlexNet needs to be trained with a total batch size of 128.") BASE_LR = 0.01 * (total_batch / 128.) logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) dataset_train = get_data('train', batch) dataset_val = get_data('val', batch) infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] callbacks = [ ModelSaver(), GPUUtilizationTracker(), EstimatedTimeLeft(), ScheduledHyperParamSetter( 'learning_rate', [(0, BASE_LR), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (80, BASE_LR * 1e-3)]), DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower))), ] return TrainConfig( model=Model(), data=StagingInput(QueueInput(dataset_train)), callbacks=callbacks, steps_per_epoch=1281167 // total_batch, max_epoch=100, )
def train(): dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator( EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def init_config(): if config.TRAINER == 'horovod': ngpu = hvd.size() else: ngpu = get_num_gpu() assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu if config.NUM_GPUS is None: config.NUM_GPUS = ngpu else: if config.TRAINER == 'horovod': assert config.NUM_GPUS == ngpu else: assert config.NUM_GPUS <= ngpu print_config()
def get_config(model, fake=False): nr_tower = max(get_num_gpu(), 1) assert args.batch % nr_tower == 0 batch = args.batch // nr_tower logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) if batch < 32 or batch > 64: logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.") if fake: data = QueueInput(FakeData( [[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8')) callbacks = [] else: data = QueueInput(get_data('train', batch)) START_LR = 0.1 BASE_LR = START_LR * (args.batch / 256.0) callbacks = [ ModelSaver(), EstimatedTimeLeft(), ScheduledHyperParamSetter( 'learning_rate', [ (0, min(START_LR, BASE_LR)), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (90, BASE_LR * 1e-3), (100, BASE_LR * 1e-4)]), ] if BASE_LR > START_LR: callbacks.append( ScheduledHyperParamSetter( 'learning_rate', [(0, START_LR), (5, BASE_LR)], interp='linear')) infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] dataset_val = get_data('val', batch) if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append(DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower)))) return TrainConfig( model=model, data=data, callbacks=callbacks, steps_per_epoch=100 if args.fake else 1281167 // args.batch, max_epoch=105, )
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1 # +1 background _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult if is_training: os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' logger.info("Config: ------------------------------------------\n" + str(_C))
def get_config(): nr_tower = max(get_num_gpu(), 1) batch = args.batch total_batch = batch * nr_tower assert total_batch >= 256 # otherwise the learning rate warmup is wrong. BASE_LR = 0.01 * (total_batch / 256.) logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) dataset_train = get_data('train', batch) dataset_val = get_data('val', batch) infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] callbacks = [ ModelSaver(), GPUUtilizationTracker(), EstimatedTimeLeft(), ScheduledHyperParamSetter( 'learning_rate', [(0, 0.01), (3, max(BASE_LR, 0.01))], interp='linear'), ScheduledHyperParamSetter( 'learning_rate', [(30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), (80, BASE_LR * 1e-3)]), DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower))), ] input = QueueInput(dataset_train) input = StagingInput(input, nr_stage=1) return TrainConfig( model=Model(), data=input, callbacks=callbacks, steps_per_epoch=1281167 // total_batch, max_epoch=100, )
InferenceRunner(dataset_val, [ ClassificationError('wrong-top1', 'val-top1-error'), ClassificationError('wrong-top5', 'val-top5-error')]), ScheduledHyperParamSetter('learning_rate', [(8, 0.03), (14, 0.02), (17, 5e-3), (19, 3e-3), (24, 1e-3), (26, 2e-4), (30, 5e-5)]) ], model=Model(), steps_per_epoch=5000, max_epoch=80, ) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--load', help='load model') parser.add_argument('--data', help='ImageNet data root directory', required=True) args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu config = get_config() if args.load: config.session_init = SaverRestore(args.load) nr_tower = get_num_gpu() assert nr_tower == NUM_GPU launch_train_with_config(config, SyncMultiGPUTrainer(NUM_GPU))
M = tf.keras.models.Model(input, x, name='resnet50') return M if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--fake', help='use fakedata to test or benchmark this model', action='store_true') args = parser.parse_args() logger.set_logger_dir(os.path.join("train_log", "imagenet-resnet-keras")) tf.keras.backend.set_image_data_format('channels_first') num_gpu = get_num_gpu() if args.fake: df_train = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False, dtype='uint8') df_val = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False) else: batch_size = TOTAL_BATCH_SIZE // num_gpu assert args.data is not None df_train = get_imagenet_dataflow(args.data, 'train', batch_size, fbresnet_augmentor(True)) df_val = get_imagenet_dataflow(args.data, 'val', batch_size, fbresnet_augmentor(False)) def one_hot(label):
def get_data(): train = BatchData(dataset.Mnist('train'), 128) test = BatchData(dataset.Mnist('test'), 256, remainder=True) return train, test if __name__ == '__main__': logger.auto_set_dir() dataset_train, dataset_test = get_data() cfg = TrainConfig( model=Model(), dataflow=dataset_train, callbacks=[ KerasPhaseCallback(True), # for Keras training ModelSaver(), InferenceRunner(dataset_test, ScalarStats(['cross_entropy_loss', 'accuracy'])), ], max_epoch=100, ) if get_num_gpu() <= 1: # single GPU: launch_train_with_config(cfg, QueueInputTrainer()) else: # multi GPU: launch_train_with_config(cfg, SyncMultiGPUTrainerParameterServer(2)) # "Replicated" multi-gpu trainer is not supported for Keras model # since Keras does not respect variable scopes.
]), ScheduledHyperParamSetter('learning_rate', [(8, 0.03), (14, 0.02), (17, 5e-3), (19, 3e-3), (24, 1e-3), (26, 2e-4), (30, 5e-5)]) ], model=Model(), steps_per_epoch=5000, max_epoch=80, ) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--load', help='load model') parser.add_argument('--data', help='ImageNet data root directory', required=True) args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu config = get_config() if args.load: config.session_init = SaverRestore(args.load) nr_tower = get_num_gpu() assert nr_tower == NUM_GPU launch_train_with_config(config, SyncMultiGPUTrainer(NUM_GPU))
if cnt == 500: return if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--depth', type=int, default=18) parser.add_argument('--load', help='load model') parser.add_argument('--cam', action='store_true') args = parser.parse_args() DEPTH = args.depth if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu num_gpu = get_num_gpu() BATCH_SIZE = TOTAL_BATCH_SIZE // num_gpu if args.cam: BATCH_SIZE = 128 # something that can run on one gpu viz_cam(args.load, args.data) sys.exit() logger.auto_set_dir() config = get_config() if args.load: config.session_init = get_model_loader(args.load) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(num_gpu))
parser.add_argument('--eval', action='store_true', help='run offline evaluation instead of training') parser.add_argument('--batch', default=256, type=int, help="total batch size. " "Note that it's best to keep per-GPU batch size in [32, 64] to obtain the best accuracy." "Pretrained models listed in README were trained with batch=32x8.") parser.add_argument('--mode', choices=['resnet', 'preact', 'se'], help='variants of resnet to use', default='resnet') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu model = Model(args.depth, args.mode) model.data_format = args.data_format if args.eval: batch = 128 # something that can run on one gpu ds = get_data('val', batch) eval_on_ILSVRC12(model, get_model_loader(args.load), ds) else: if args.fake: logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd') else: logger.set_logger_dir( os.path.join('train_log', 'imagenet-{}-d{}-batch{}'.format(args.mode, args.depth, args.batch))) config = get_config(model, fake=args.fake) if args.load: config.session_init = get_model_loader(args.load) trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1)) launch_train_with_config(config, trainer)
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.freeze(False) # populate new keys now if isinstance(_C.DATA.VAL, six.string_types ): # support single string (the typical case) as well _C.DATA.VAL = (_C.DATA.VAL, ) if isinstance(_C.DATA.TRAIN, six.string_types): # support single string _C.DATA.TRAIN = (_C.DATA.TRAIN, ) # finalize dataset definitions ... from mot.object_detection.dataset import DatasetRegistry datasets = list(_C.DATA.TRAIN) + list(_C.DATA.VAL) _C.DATA.CLASS_NAMES = DatasetRegistry.get_metadata(datasets[0], "class_names") _C.DATA.NUM_CATEGORY = len(_C.DATA.CLASS_NAMES) - 1 assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN', 'None'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: # the first threshold is the proposal sampling threshold assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS) if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if isinstance( train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100: # don't autotune if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER lr = _C.TRAIN.LR_SCHEDULE if isinstance(lr, six.string_types): if lr.endswith("x"): LR_SCHEDULE_KITER = { "{}x".format(k): [180 * k - 120, 180 * k - 40, 180 * k] for k in range(2, 10) } LR_SCHEDULE_KITER["1x"] = [120, 160, 180] _C.TRAIN.LR_SCHEDULE = [ x * 1000 for x in LR_SCHEDULE_KITER[lr] ] else: _C.TRAIN.LR_SCHEDULE = eval(lr) # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() logger.info("Horovod Rank={}, Size={}, LocalRank={}".format( hvd.rank(), hvd.size(), hvd.local_rank())) else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to train with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format( ngpu) else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' ngpu = get_num_gpu() if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.freeze(False) # populate new keys now if isinstance(_C.DATA.VAL, six.string_types ): # support single string (the typical case) as well _C.DATA.VAL = (_C.DATA.VAL, ) if isinstance(_C.DATA.TRAIN, six.string_types): # support single string _C.DATA.TRAIN = (_C.DATA.TRAIN, ) # finalize dataset definitions ... from dataset import DatasetRegistry datasets = list(_C.DATA.TRAIN) + list(_C.DATA.VAL) # _C.DATA.CLASS_NAMES = ["BG", "class1", "class2", "class3", "class4", "class5", "class6"] # _C.DATA.CLASS_NAMES = [ # "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] # noqa # _C.DATA.CLASS_NAMES = ["BG"] + _C.DATA.CLASS_NAMES # print(datasets[0]) _C.DATA.CLASS_NAMES = DatasetRegistry.get_metadata(datasets[0], "class_names") # print(_C.DATA.CLASS_NAMES) _C.DATA.NUM_CATEGORY = len(_C.DATA.CLASS_NAMES) - 1 assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN', 'None'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: # the first threshold is the proposal sampling threshold assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS) if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if isinstance( train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100: # don't autotune if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER lr = _C.TRAIN.LR_SCHEDULE if isinstance(lr, six.string_types): if lr.endswith("x"): LR_SCHEDULE_KITER = { "{}x".format(k): [180 * k - 120, 180 * k - 40, 180 * k] for k in range(2, 10) } LR_SCHEDULE_KITER["1x"] = [120, 160, 180] _C.TRAIN.LR_SCHEDULE = [ x * 1000 for x in LR_SCHEDULE_KITER[lr] ] else: _C.TRAIN.LR_SCHEDULE = eval(lr) # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() logger.info("Horovod Rank={}, Size={}, LocalRank={}".format( hvd.rank(), hvd.size(), hvd.local_rank())) else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to train with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format( ngpu) else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' ngpu = get_num_gpu() if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
def tp_evaluation(args, cfg, sess, model): num_gpu = get_num_gpu() df = PneuSegDF(args.mode, None, args.train_dir, args.testset_dir, args.min_num_workers, cfg) ds = df.eval_prepared(num_gpu, args.batch_size) tf.train.Saver().restore(sess, args.model_file) if os.path.exists(args.pkl_dir): input( "Result file already exists. Press enter to continue and overwrite it when inference is done..." ) pbar = tqdm(total=len(ds)) ds.reset_state() # for i in range(0, len(ds), cfg.batch_size): # batch = ds[i:i + cfg.batch_size] # pbar.update(cfg.batch_size) pneu_eval = Evaluation() in_ims, in_gts, p_infos, in_og_ims = [], [], [], [] bad_slice_dirs = [] for idx, in_data in enumerate(ds): if not df.ex_process.og_shape: df.ex_process.og_shape = in_data[1].shape[:-1] in_ims.append(in_data[0]) in_gts.append(in_data[1]) if args.viz: in_og_ims.append(in_data[4]) df.ex_process.tl_list.append(in_data[2]) data_dir = in_data[3] pneu_eval.add_to_p_map(data_dir) p_id = data_dir.split('/')[-3] assert len(p_id) == 32 p_infos.append((p_id, data_dir)) if len(in_ims) == cfg.batch_size or idx == len(ds) - 1: assert len(in_ims) == len(in_gts) and len(in_ims) == len(p_infos) im_batch, in_gts = np.array(in_ims), np.array(in_gts) pred = sess.run(model.ops["seg_map"], feed_dict={model.in_im: im_batch}) pred = (1 / (1 + np.exp(-pred))) > .5 pred = df.ex_process.batch_postprocess(pred) if args.viz: og_im_batch = np.array(in_og_ims) og_im_batch = im_normalize( og_im_batch, cfg.preprocess["normalize"]["ct_interval"], cfg.preprocess["normalize"]["norm_by_interval"]) viz_patient(og_im_batch, pred, in_gts, True) intersection = pred * in_gts for i in range(pred.shape[0]): itsect = np.sum(intersection[i, :, :, :]) ga = np.sum(in_gts[i, :, :, :]) pa = np.sum(pred[i, :, :, :]) pneu_eval.person_map[p_infos[i] [0]].pixel_info["intersection"] += itsect pneu_eval.person_map[p_infos[i][0]].pixel_info["gt_area"] += ga pneu_eval.person_map[p_infos[i] [0]].pixel_info["pred_area"] += pa if args.bad_slice_output: if (ga != 0 or pa != 0 ) and (2 * itsect + 1e-8) / (ga + pa + 1e-8) < .3: bad_slice_dirs.append(p_infos[i][1]) if args.eval_debug: print(p_infos[i][1]) pbar.update(cfg.batch_size) in_ims, in_gts, p_infos, in_og_ims = [], [], [], [] if idx == len(ds) - 1: break if args.eval_debug and idx == 2000: break pbar.close() if args.bad_slice_output: try: json.dump( bad_slice_dirs, open(args.pkl_dir.replace(".pkl", "_bad_slice.json"), "w")) except: print("Failed saving as json. Save as pickle instead...") pickle.dump( bad_slice_dirs, open(args.pkl_dir.replace(".pkl", "_bad_slice.pkl"), "wb")) pneu_eval.pixel_wise_result(args.pkl_dir, True)
def train(args, cfg): out_dirs = gen_outdirs(args, "tp") output_dir, out_res_dir = out_dirs["output_dir"], out_dirs["out_res_dir"] df = PneuSegDF(args.mode, out_res_dir, args.train_dir, args.testset_dir, args.min_num_workers, cfg) num_gpu = max(get_num_gpu(), 1) ds = df.prepared(num_gpu, cfg.batch_size) # Avoid overwritting config file if os.path.exists(pj(output_dir, os.path.basename(args.config))): input( "Config file will NOT be overwritten. Press Enter to continue...") else: shutil.copy(args.config, output_dir) logger.set_logger_dir(pj(output_dir, "log")) callback_list = [ # PeriodicCallback overwritten the frequency of what's wrapped PeriodicCallback(ModelSaver(50, checkpoint_dir=output_dir), every_k_epochs=1), GPUUtilizationTracker(), MergeAllSummaries(1 if args.train_debug else 0), # ProgressBar(["Loss"]) ] if cfg.network["norm_layer"] == "BN_layers": callback_list.append(BN_layers_update()) if cfg.lr_schedule["type"] == "epoch_wise_constant": schedule = [(ep, lr / num_gpu) for ep, lr in zip( [0] + cfg.lr_schedule["epoch_to_drop_lr"], cfg.lr_schedule["lr"])] callback_list.append( ScheduledHyperParamSetter("learning_rate", schedule)) elif cfg.lr_schedule["type"] == "halved": schedule = [(0, cfg.lr_schedule["init_lr"])] for i in range(cfg.lr_schedule["first_epoch2drop"], cfg.max_epoch, cfg.lr_schedule["period"]): schedule.append( (i, schedule[int((i - cfg.lr_schedule["first_epoch2drop"]) / cfg.lr_schedule["period"])][1] / (cfg.lr_schedule["decay_rate"] * num_gpu))) print(schedule) callback_list.append( ScheduledHyperParamSetter("learning_rate", schedule)) steps_per_epoch = len(ds) // num_gpu + 1 train_cfg = TrainConfig( model=Tensorpack_model(cfg, steps_per_epoch), data=QueueInput(ds), steps_per_epoch=steps_per_epoch, callbacks=callback_list, monitors=[ # ScalarPrinter(True, whitelist=["Loss", "LR"]), ScalarPrinter(True), # ScalarPrinter(), TFEventWriter(), # JSONWriter() ], max_epoch=cfg.max_epoch, session_init=SmartInit(args.resume), starting_epoch=args.resume_epoch) launch_train_with_config( train_cfg, SyncMultiGPUTrainerReplicated(num_gpu) if num_gpu > 1 else SimpleTrainer())
def get_config(model, fake=False, start_epoch=1): nr_tower = max(get_num_gpu(), 1) assert args.batch % nr_tower == 0 batch = args.batch // nr_tower logger.info("Running on {} towers. Batch size per tower: {}".format( nr_tower, batch)) if batch < 32 or batch > 64: logger.warn( "Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported." ) if fake: data = QueueInput( FakeData([[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8')) callbacks = [] else: data = QueueInput(get_data('train', batch)) START_LR = 0.1 BASE_LR = START_LR * (args.batch / 256.0) callbacks = [ ModelSaver(), EstimatedTimeLeft(), # ScheduledHyperParamSetter( # 'learning_rate', [ # (0, min(START_LR, BASE_LR)), (30, BASE_LR * 1e-1), (60, BASE_LR * 1e-2), # (90, BASE_LR * 1e-3), (100, BASE_LR * 1e-4)]), StatMonitorParamSetter('learning_rate', 'val-error-top1', lambda x: x * 0.1, 1e-4, 5), ] if BASE_LR > START_LR: callbacks.append( ScheduledHyperParamSetter('learning_rate', [(0, START_LR), (5, BASE_LR)], interp='linear')) infs = [ ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5') ] dataset_val = get_data('val', batch) if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append( DataParallelInferenceRunner(dataset_val, infs, list(range(nr_tower)))) return TrainConfig( model=model, data=data, callbacks=callbacks, steps_per_epoch=100 if args.fake else (419654) // args.batch, max_epoch=105, starting_epoch=start_epoch, )
parser.add_argument('-n', '--num_units', help='number of units in each stage', type=int, default=18) parser.add_argument('--load', help='load model for training') args = parser.parse_args() NUM_UNITS = args.num_units if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu logger.auto_set_dir() dataset_train = get_data('train') dataset_test = get_data('test') config = TrainConfig( model=Model(n=NUM_UNITS), dataflow=dataset_train, callbacks=[ ModelSaver(), InferenceRunner(dataset_test, [ScalarStats('cost'), ClassificationError('wrong_vector')]), ScheduledHyperParamSetter('learning_rate', [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)]) ], max_epoch=400, session_init=SaverRestore(args.load) if args.load else None ) num_gpu = max(get_num_gpu(), 1) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(num_gpu))
cv2.imwrite("out{}.png".format('-fused' if k == 5 else str(k + 1)), pred * 255) logger.info("Results saved to out*.png") else: pred = outputs[5][0] cv2.imwrite(output, pred * 255) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--load', help='load model') parser.add_argument('--view', help='view dataset', action='store_true') parser.add_argument('--run', help='run model on images') parser.add_argument('--output', help='fused output filename. default to out-fused.png') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.view: view_data() elif args.run: run(args.load, args.run, args.output) else: config = get_config() if args.load: config.session_init = get_model_loader(args.load) launch_train_with_config(config, SyncMultiGPUTrainer(max(get_num_gpu(), 1)))
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.freeze(False) # populate new keys now _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1 # +1 background _C.DATA.BASEDIR = os.path.expanduser(_C.DATA.BASEDIR) if isinstance(_C.DATA.VAL, six.string_types ): # support single string (the typical case) as well _C.DATA.VAL = (_C.DATA.VAL, ) assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN', 'None'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: # the first threshold is the proposal sampling threshold assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS) if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if isinstance( train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100: # don't warmup if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() if ngpu == hvd.local_size(): logger.warn( "It's not recommended to use horovod for single-machine training. " "Replicated trainer is more stable and has the same efficiency." ) else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format( ngpu) else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' ngpu = get_num_gpu() assert ngpu > 0, "Has to run with GPU!" if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
pred = outputs[k][0] cv2.imwrite("out{}.png".format( '-fused' if k == 5 else str(k + 1)), pred * 255) else: pred = outputs[5][0] cv2.imwrite(output, pred * 255) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--load', help='load model') parser.add_argument('--view', help='view dataset', action='store_true') parser.add_argument('--run', help='run model on images') parser.add_argument('--output', help='fused output filename. default to out-fused.png') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.view: view_data() elif args.run: run(args.load, args.run, args.output) else: config = get_config() if args.load: config.session_init = get_model_loader(args.load) launch_train_with_config( config, SyncMultiGPUTrainer(max(get_num_gpu(), 1)))
] input = QueueInput(dataset_train) input = StagingInput(input, nr_stage=1) return TrainConfig( model=Model(), data=input, callbacks=callbacks, steps_per_epoch=1281167 // total_batch, max_epoch=100, ) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--batch', type=int, default=32, help='batch per GPU') parser.add_argument('--norm', choices=['none', 'bn', 'gn'], default='none') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu logger.set_logger_dir(os.path.join('train_log', 'vgg16-norm={}'.format(args.norm))) config = get_config() nr_tower = max(get_num_gpu(), 1) trainer = SyncMultiGPUTrainerReplicated(nr_tower) launch_train_with_config(config, trainer)
def train(): assert tf.test.is_gpu_available(), "Training requires GPUs!" dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if num_gpu == 1 else AsyncMultiGPUTrainer( train_tower) launch_train_with_config(config, trainer)
parser.add_argument('--mode', choices=['resnet', 'preact', 'se', 'resnext32x4d'], help='variants of resnet to use', default='resnet') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu model = Model(args.depth, args.mode) model.data_format = args.data_format if args.weight_decay_norm: model.weight_decay_pattern = ".*/W|.*/gamma|.*/beta" if args.eval: batch = 128 # something that can run on one gpu ds = get_imagenet_dataflow(args.data, 'val', batch) eval_classification(model, get_model_loader(args.load), ds) else: if args.fake: logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd') else: logger.set_logger_dir( os.path.join('train_log', 'imagenet-{}-d{}-batch{}'.format( args.mode, args.depth, args.batch))) config = get_config(model) if args.load: config.session_init = get_model_loader(args.load) trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1)) launch_train_with_config(config, trainer)
imgaug.ToFloat32(), ] ds_test2.reset_state() ds_test2 = AugmentImageComponent(ds_test2, ag_test2, 0) ds_test2 = BatchData(ds_test2, args.batch) ds_test2 = PrintData(ds_test2) # Setup the config config = TrainConfig( model=model, dataflow=ds_train, callbacks=[ ModelSaver(), MinSaver('cost'), ScheduledHyperParamSetter('learning_rate', [(0, 1e-2), (50, 1e-3), (100, 1e-4), (150, 1e-5), (200, 1e-6)]), InferenceRunner(ds_valid, [CustomBinaryClassificationStats('estim', 'label', args, prefix='valid'), ScalarStats(['loss_xent', 'cost'], prefix='valid'), ], tower_name='ValidTower'), InferenceRunner(ds_test2, [CustomBinaryClassificationStats('estim', 'label', args, prefix='test2'), ScalarStats(['loss_xent', 'cost'], prefix='test2'), ], tower_name='Test2Tower'), ], max_epoch=250, session_init=SmartInit(args.load), ) trainer = SyncMultiGPUTrainerParameterServer(max(get_num_gpu(), 1)) launch_train_with_config(config, trainer)
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1 # +1 background _C.DATA.BASEDIR = os.path.expanduser(_C.DATA.BASEDIR) assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: num_cascade = _C.CASCADE.NUM_STAGES # the first threshold is the proposal sampling threshold assert len(_C.CASCADE.IOUS) == num_cascade assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == num_cascade if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if train_scales[1] - train_scales[0] > 100: # don't warmup if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to run with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
parser.add_argument('--load', help='load model for training') args = parser.parse_args() NUM_UNITS = args.num_units if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu logger.auto_set_dir() dataset_train = get_data('train') dataset_test = get_data('test') config = TrainConfig( model=Model(n=NUM_UNITS), dataflow=dataset_train, callbacks=[ ModelSaver(), InferenceRunner( dataset_test, [ScalarStats('cost'), ClassificationError('wrong_vector')]), ScheduledHyperParamSetter('learning_rate', [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)]) ], max_epoch=400, session_init=SaverRestore(args.load) if args.load else None) num_gpu = max(get_num_gpu(), 1) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(num_gpu))
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1 # +1 background _C.DATA.BASEDIR = os.path.expanduser(_C.DATA.BASEDIR) assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: num_cascade = _C.CASCADE.NUM_STAGES # the first threshold is the proposal sampling threshold assert len(_C.CASCADE.IOUS) == num_cascade assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == num_cascade if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if train_scales[1] - train_scales[0] > 100: # don't warmup if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to run with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
def train(checkpoint_dir, model_name, dataset, num_epochs, quant_type, batch_size_per_gpu, lr=None, post_quantize_only=False): train_data, test_data, (img_shape, label_shape) = datasets.DATASETS[dataset]() num_gpus = max(gpu.get_num_gpu(), 1) effective_batch_size = batch_size_per_gpu * num_gpus train_data = BatchData(train_data, batch_size_per_gpu) test_data = BatchData(test_data, batch_size_per_gpu, remainder=True) steps_per_epoch = len(train_data) // num_gpus if lr: if isinstance(lr, str): lr = ast.literal_eval(lr) if isinstance(lr, float): lr_schedule = [(0, lr)] else: lr_schedule = lr else: lr_schedule = [(0, 0.005), (8, 0.1), (25, 0.005), (30, 0)] if num_epochs is None: num_epochs = lr_schedule[-1][0] if post_quantize_only: start_quantising_at_epoch = 0 else: start_quantising_at_epoch = lr_schedule[-2][0] if len( lr_schedule) > 1 else max(0, num_epochs - 5) logger.info(f"Training with LR schedule: {str(lr_schedule)}") logger.info(f"Quantising at epoch {start_quantising_at_epoch}") # train_data = FakeData([(batch_size_per_gpu,) + img_shape, (batch_size_per_gpu, ) + label_shape]) model_func, input_spec, output_spec = get_model_func( "train", model_name, quant_type, img_shape, num_classes=label_shape[0], quant_delay=steps_per_epoch * start_quantising_at_epoch) target_spec = [ tf.TensorSpec(t.shape, t.dtype, name=t.name.split("/")[-1] + "_target") for t in output_spec ] model = KerasModel(get_model=model_func, input_signature=input_spec, target_signature=target_spec, input=train_data, trainer=SyncMultiGPUTrainerParameterServer( num_gpus, ps_device='gpu')) lr = tf.get_variable('learning_rate', initializer=lr_schedule[0][1], trainable=False) tf.summary.scalar('learning_rate-summary', lr) model.compile(optimizer=tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9), loss="categorical_crossentropy", metrics=["categorical_accuracy"]) model.fit(steps_per_epoch=steps_per_epoch, max_epoch=num_epochs, callbacks=[ ModelSaver(max_to_keep=1, checkpoint_dir=checkpoint_dir), DataParallelInferenceRunner( test_data, ScalarStats(model._stats_to_inference), num_gpus), ScheduledHyperParamSetter('learning_rate', lr_schedule, interp="linear"), StatMonitorParamSetter('learning_rate', 'validation_categorical_accuracy', lambda x: x / 2, threshold=0.001, last_k=10, reverse=True) ], session_init=SaverRestore(checkpoint_dir + "/checkpoint") if post_quantize_only else None)
if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.apply: apply(args.load, args.lowres, args.output) else: logger.auto_set_dir() if args.load: session_init = SaverRestore(args.load) else: assert os.path.isfile(args.vgg19) param_dict = dict(np.load(args.vgg19)) param_dict = {'VGG19/' + name: value for name, value in six.iteritems(param_dict)} session_init = DictRestore(param_dict) nr_tower = max(get_num_gpu(), 1) data = QueueInput(get_data(args.data)) model = Model() trainer = SeparateGANTrainer(data, model, d_period=3) trainer.train_with_defaults( callbacks=[ ModelSaver(keep_checkpoint_every_n_hours=2) ], session_init=session_init, steps_per_epoch=len(data) // 4, max_epoch=300 )
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--load', help='load model') parser.add_argument('--sample', action='store_true', help='run sampling') parser.add_argument('--data', help='Image directory', required=True) parser.add_argument('--mode', choices=['AtoB', 'BtoA'], default='AtoB') parser.add_argument('-b', '--batch', type=int, default=1) args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu BATCH = args.batch if args.sample: assert args.load sample(args.data, args.load) else: logger.auto_set_dir() data = QueueInput(get_data()) trainer = GANTrainer(data, Model(), get_num_gpu()) trainer.train_with_defaults( callbacks=[ PeriodicTrigger(ModelSaver(), every_k_epochs=3), ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)]) ], steps_per_epoch=data.size(), max_epoch=300, session_init=SaverRestore(args.load) if args.load else None )