def get_model( *, model_dir: str, arch: str = "resnet50", precision: str = "fp32", use_xla: bool = True, use_tf_amp: bool = False, use_dali: bool = False, gpu_memory_fraction=0.7, ): from runtime import Runner from utils import hvd_wrapper as hvd hvd.init() try: dtype = {"fp16": tf.float16, "fp32": tf.float32}[precision.lower()] except KeyError: raise ValueError( f"Uknown precision {precision}. Allowed values: fp16|fp32") LOGGER.info( f"Creating model arch={arch} precision={precision} xla={use_xla}" f"tf_amp={use_tf_amp}, dali={use_dali}, gpu_memory_frac={gpu_memory_fraction}" ) runner = Runner( n_classes=NCLASSES, architecture=arch, input_format=INPUT_FORMAT, compute_format=COMPUTE_FORMAT, dtype=dtype, n_channels=NCHANNELS, height=HEIGHT, width=WIDTH, use_xla=use_xla, use_tf_amp=use_tf_amp, use_dali=use_dali, gpu_memory_fraction=gpu_memory_fraction, gpu_id=0, model_dir=model_dir, ) # removed params not used in inference estimator_params = { "use_final_conv": False } # TODO: Why not moved to model constructor? estimator = runner._get_estimator( mode="inference", run_params=estimator_params, use_xla=use_xla, use_dali=use_dali, gpu_memory_fraction=gpu_memory_fraction, ) return estimator
seed=FLAGS.seed, ) # =================================== runner = Runner( # ========= Model HParams ========= # n_classes=RUNNING_CONFIG.n_classes, input_format=RUNNING_CONFIG.input_format, compute_format=RUNNING_CONFIG.compute_format, dtype=RUNNING_CONFIG.dtype, n_channels=RUNNING_CONFIG.n_channels, height=RUNNING_CONFIG.height, width=RUNNING_CONFIG.width, distort_colors=RUNNING_CONFIG.distort_colors, log_dir=RUNNING_CONFIG.log_dir, model_dir=RUNNING_CONFIG.model_dir, data_dir=RUNNING_CONFIG.data_dir, data_idx_dir=RUNNING_CONFIG.data_idx_dir, # ======= Optimization HParams ======== # use_xla=RUNNING_CONFIG.use_xla, use_tf_amp=RUNNING_CONFIG.use_tf_amp, use_dali=RUNNING_CONFIG.use_dali, gpu_memory_fraction=RUNNING_CONFIG.gpu_memory_fraction, seed=RUNNING_CONFIG.seed ) if RUNNING_CONFIG.mode in ["train", "train_and_evaluate", "training_benchmark"]: runner.train( iter_unit=RUNNING_CONFIG.iter_unit,
else: dllogger.init(backends=[]) dllogger.log(data=vars(FLAGS), step='PARAMETER') runner = Runner( # ========= Model HParams ========= # n_classes=1001, architecture=FLAGS.arch, input_format='NHWC', compute_format=FLAGS.data_format, dtype=tf.float32, n_channels=3, height=224 if FLAGS.data_dir else FLAGS.synthetic_data_size, width=224 if FLAGS.data_dir else FLAGS.synthetic_data_size, distort_colors=False, log_dir=FLAGS.results_dir, model_dir=FLAGS.model_dir if FLAGS.model_dir is not None else FLAGS.results_dir, data_dir=FLAGS.data_dir, data_idx_dir=FLAGS.data_idx_dir, weight_init=FLAGS.weight_init, use_xla=FLAGS.xla, use_tf_amp=FLAGS.amp, use_dali=FLAGS.dali, use_cpu=FLAGS.cpu, gpu_memory_fraction=FLAGS.gpu_memory_fraction, gpu_id=FLAGS.gpu_id, seed=FLAGS.seed) if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]: runner.train(iter_unit=FLAGS.iter_unit, num_iter=FLAGS.num_iter,
_NUM_CLASSES = 3 if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.ERROR) FLAGS = parse_cmdline() runner = Runner( # ========= Model HParams ========= # n_classes=_NUM_CLASSES, log_dir=FLAGS.results_dir, model_dir=FLAGS.results_dir, data_dir=FLAGS.data_dir, pre_trained_model_path=FLAGS.pre_trained_model_path, use_transpose_conv=FLAGS.use_transpose_conv, # ======= Optimization HParams ======== # use_xla=FLAGS.use_xla, use_tf_amp=FLAGS.use_tf_amp, seed=FLAGS.seed ) if FLAGS.mode == "train_and_evaluate" and FLAGS.eval_every > 0: for i in range(int(FLAGS.num_iter / FLAGS.eval_every)): runner.train( iter_unit=FLAGS.iter_unit, num_iter=FLAGS.eval_every,
runner = Runner( input_format=RUNNING_CONFIG.input_format, compute_format=RUNNING_CONFIG.compute_format, n_channels=RUNNING_CONFIG.n_channels, model_variant=RUNNING_CONFIG.unet_variant, activation_fn=RUNNING_CONFIG.activation_fn, input_shape=RUNNING_CONFIG.input_shape, mask_shape=RUNNING_CONFIG.mask_shape, input_normalization_method=RUNNING_CONFIG.input_normalization_method, # Training HParams augment_data=RUNNING_CONFIG.augment_data, loss_fn_name=RUNNING_CONFIG.loss_fn_name, weight_init_method=RUNNING_CONFIG.weight_init_method, # Runtime HParams use_tf_amp=RUNNING_CONFIG.use_tf_amp, use_xla=RUNNING_CONFIG.use_xla, # Directory Params log_dir=RUNNING_CONFIG.log_dir, model_dir=RUNNING_CONFIG.model_dir, sample_dir=RUNNING_CONFIG.sample_dir, data_dir=RUNNING_CONFIG.data_dir, dataset_name=RUNNING_CONFIG.dataset_name, dataset_hparams=RUNNING_CONFIG.dataset_hparams, # Debug Params debug_verbosity=RUNNING_CONFIG.debug_verbosity, log_every_n_steps=RUNNING_CONFIG.log_every_n_steps, seed=RUNNING_CONFIG.seed)
set_flags(FLAGS) backends = [] if not hvd_utils.is_using_hvd() or hvd.rank() == 0: # Prepare Model Dir log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename) os.makedirs(FLAGS.model_dir, exist_ok=True) # Setup dlLogger backends+=[ JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path), StdOutBackend(verbosity=Verbosity.DEFAULT) ] DLLogger.init(backends=backends) DLLogger.log(data=vars(FLAGS), step='PARAMETER') runner = Runner(FLAGS, DLLogger) if FLAGS.mode in ["train", "train_and_eval", "training_benchmark"]: runner.train() if FLAGS.mode in ['eval', 'evaluate', 'inference_benchmark']: if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd(): raise NotImplementedError("Only single GPU inference is implemented.") elif not hvd_utils.is_using_hvd() or hvd.rank() == 0: runner.evaluate() if FLAGS.mode == 'predict': if FLAGS.to_predict is None: raise ValueError("No data to predict on.") if not os.path.isdir(FLAGS.to_predict):
set_flags(FLAGS) backends = [] if not hvd_utils.is_using_hvd() or hvd.rank() == 0: # Prepare Model Dir log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename) os.makedirs(FLAGS.model_dir, exist_ok=True) # Setup dlLogger backends += [ JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path), StdOutBackend(verbosity=Verbosity.DEFAULT) ] DLLogger.init(backends=backends) DLLogger.log(data=vars(FLAGS), step='PARAMETER') runner = Runner(FLAGS, DLLogger) if FLAGS.mode in ["train", "train_and_eval", "training_benchmark"]: runner.train() if FLAGS.mode in ['eval', 'evaluate', 'inference_benchmark']: if FLAGS.mode == 'inference_benchmark' and hvd_utils.is_using_hvd(): raise NotImplementedError( "Only single GPU inference is implemented.") elif not hvd_utils.is_using_hvd() or hvd.rank() == 0: runner.evaluate() if FLAGS.mode == 'predict': if FLAGS.to_predict is None: raise ValueError("No data to predict on.")
else: dllogger.init(backends=[]) dllogger.log(data=vars(FLAGS), step='PARAMETER') runner = Runner( # ========= Model HParams ========= # n_classes=1001, architecture=FLAGS.arch, input_format='NHWC', compute_format=FLAGS.data_format, dtype=tf.float32 if FLAGS.precision == 'fp32' else tf.float16, n_channels=3, height=224, width=224, distort_colors=False, log_dir=FLAGS.results_dir, model_dir=FLAGS.model_dir if FLAGS.model_dir is not None else FLAGS.results_dir, data_dir=FLAGS.data_dir, data_idx_dir=FLAGS.data_idx_dir, weight_init=FLAGS.weight_init, use_xla=FLAGS.use_xla, use_tf_amp=FLAGS.use_tf_amp, use_dali=FLAGS.use_dali, gpu_memory_fraction=FLAGS.gpu_memory_fraction, gpu_id=FLAGS.gpu_id, seed=FLAGS.seed) if FLAGS.mode in ["train", "train_and_evaluate", "training_benchmark"]: runner.train( iter_unit=FLAGS.iter_unit,
backends = [] if not hvd_utils.is_using_hvd() or hvd.rank() == 0: # Prepare Model Dir os.makedirs(config.model_dir, exist_ok=True) # Setup dlLogger backends += [ JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=config.log_filename), StdOutBackend(verbosity=Verbosity.DEFAULT) ] DLLogger.init(backends=backends) DLLogger.log(data=vars(config), step='PARAMETER') #========== initialize the runner runner = Runner(config, DLLogger) #========== determine the operation mode of the runner (tr,eval,predict) if config.mode in ["train", "train_and_eval", "training_benchmark"]: runner.train() if config.mode in ['eval', 'evaluate', 'inference_benchmark']: if config.mode == 'inference_benchmark' and hvd_utils.is_using_hvd(): raise NotImplementedError( "Only single GPU inference is implemented.") elif hvd_utils.is_using_hvd(): raise NotImplementedError( "Only single GPU evaluation is implemented.") else: runner.evaluate() if config.mode == 'predict': if config.predict_img_dir is None: