def main(_): log_info_devices = load_habana_module() print(f"Devices:\n {log_info_devices}") model_helpers.apply_clean(flags.FLAGS) with logger.benchmark_context(flags.FLAGS): stats = run(flags.FLAGS) logging.info('Run stats:\n%s', stats)
def _horovod_init(framework): size = comm_size() rank = comm_rank() hcl_config = get_hcl_config() hcl_type = get_hcl_type(hcl_config) if hcl_type != "HLS1-H": # All env variables should be set before loading_habana_modules if is_hierarchical(): os.environ["HLS1_MODULE_ID"] = str(comm_local_rank()) os.environ["ID"] = str(comm_local_rank()) else: if size > 1: os.environ["HLS1_MODULE_ID"] = str(get_hw_module_id(rank)) os.environ["ID"] = str(get_hw_module_id(rank)) # Make sure every rank logging to different file # Only important on the same machine - so pretty much every scenarios if size > 1: rank_prefix = "rank_{}_".format(rank) HorovodHelpers._set_env_prefix("TF_RANK_PREFIX", rank_prefix, False) HorovodHelpers._set_env_prefix("HBN_TF_GRAPH_PREFIX", rank_prefix, False) HorovodHelpers._set_env_prefix("TF_DUMP_GRAPH_PREFIX", rank_prefix, True) HorovodHelpers._hvd_rank_prefix = rank_prefix # Init synapse logger (if required) synapse_logger_init() # Init TF Module (for CPU Allocator) load_habana_module() if framework == Framework.TENSORFLOW: import horovod.tensorflow as hvd elif framework == Framework.KERAS: import horovod.tensorflow.keras as hvd else: raise Exception( "Specified framework: {} is not supported by horovod_helpers". format(framework)) hvd.init() assert rank == hvd.rank( ), "There is possible rank mismatch between mpi and horovod" HorovodHelpers._hvd = hvd
def run_imagenet(flags_obj): """Run ResNet ImageNet training and eval loop. Args: flags_obj: An object containing parsed flag values. Returns: Dict of results of the run. Contains the keys `eval_results` and `train_hooks`. `eval_results` contains accuracy (top_1) and accuracy_top_5. `train_hooks` is a list the instances of hooks used during training. """ input_function = (flags_obj.use_synthetic_data and get_synth_input_fn( flags_core.get_tf_dtype(flags_obj)) or input_fn) if flags.FLAGS.is_mlperf_enabled: tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) else: tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) if flags_obj.use_horovod: assert flags_obj.no_hpu == False, "Horovod without HPU is not supported in helpers." hvd_init() else: synapse_logger_init() if flags.FLAGS.is_mlperf_enabled: resnet_run_loop.init_mllog_mlloger() if not flags_obj.no_hpu: log_info_devices = load_habana_module() print(f"Devices:\n {log_info_devices}") result = resnet_run_loop.resnet_main( flags_obj, imagenet_model_fn, input_function, DATASET_NAME, shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS]) return result
import tensorflow as tf from TensorFlow.common.library_loader import load_habana_module tf.compat.v1.disable_eager_execution() load_habana_module() (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(10), ]) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) model.fit(x_train, y_train, epochs=5, batch_size=128) model.evaluate(x_test, y_test)
if FLAGS.export_dir: tf.gfile.MakeDirs(FLAGS.export_dir) squad_serving_input_fn = (build_squad_serving_input_fn( FLAGS.max_seq_length)) tf.logging.info("Starting to export model.") subfolder = estimator.export_saved_model( export_dir_base=os.path.join(FLAGS.export_dir, "saved_model"), serving_input_receiver_fn=squad_serving_input_fn) tf.logging.info("Starting to export TFLite.") converter = tf.lite.TFLiteConverter.from_saved_model( subfolder, input_arrays=["input_ids", "input_mask", "segment_ids"], output_arrays=["start_logits", "end_logits"]) float_model = converter.convert() tflite_file = os.path.join(FLAGS.export_dir, "albert_model.tflite") with tf.gfile.GFile(tflite_file, "wb") as f: f.write(float_model) if __name__ == "__main__": log_info_devices = load_habana_module() tf.logging.info("Devices:\n%s", log_info_devices) flags.mark_flag_as_required("spm_model_file") flags.mark_flag_as_required("albert_config_file") flags.mark_flag_as_required("output_dir") tf.app.run()
def run_coco(args): print("Command: ", args.command) print("Model: ", args.model) print("Dataset: ", args.dataset) print("Year: ", args.year) print("Logs: ", args.logs) print("Auto Download: ", args.download) ############################################################ # Configurations ############################################################ if args.deterministic: tf.config.threading.set_inter_op_parallelism_threads(1) tf.config.threading.set_intra_op_parallelism_threads(1) tf.reset_default_graph() SEED = 0 os.environ['PYTHONHASHSEED'] = str(SEED) os.environ['TF_DETERMINISTIC_OPS'] = '1' random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) is_master = True hvd = None if args.gpus < 0: config = tf.ConfigProto(device_count={'GPU': 0}) K.set_session(tf.Session(config=config)) print('running on cpu') if args.using_horovod and args.command == "train": if args.device in ['HPU']: from TensorFlow.common.horovod_helpers import hvd_init, Framework hvd = hvd_init(framework=Framework.KERAS) else: import horovod.tensorflow.keras as hvd hvd.init() confighorovod = tf.ConfigProto() confighorovod.gpu_options.visible_device_list = str( hvd.local_rank()) K.set_session(tf.Session(config=confighorovod)) is_master = hvd.local_rank() == 0 if not is_master: tf.get_logger().setLevel(tf.logging.FATAL) elif args.using_horovod and args.command == "evaluate": if args.device in ['HPU']: from TensorFlow.common.horovod_helpers import hvd_init, Framework hvd = hvd_init(framework=Framework.KERAS) else: confighorovod = tf.ConfigProto() confighorovod.gpu_options.visible_device_list = str(args.gpus) K.set_session(tf.Session(config=confighorovod)) is_master = hvd.local_rank() == 0 if not is_master: tf.get_logger().setLevel(tf.logging.FATAL) if args.device in ['HPU']: from TensorFlow.common.library_loader import load_habana_module load_habana_module() dev_str = f'/device:{args.device}:0' print(f'Selected device: {dev_str}') class CocoConfig(Config): """Configuration for training on MS COCO. Derives from the base Config class and overrides values specific to the COCO dataset. """ # Give the configuration a recognizable name NAME = "coco" if hvd: _GPU_COUNT = hvd.size() GPU_COUNT = 1 #fix batch size as IMAGES_PER_GPU else: _GPU_COUNT = abs(args.gpus) GPU_COUNT = _GPU_COUNT if args.fchollet_fix: BGR = True ## mean pixel is in RGB format to match original settings MEAN_PIXEL = [123.68, 116.78, 103.94] elif args.BGR or 'kapp_' in args.backbone: ## BGR/caffe format BGR = True MEAN_PIXEL = [103.94, 116.78, 123.68] else: ## default RGB mode BGR = False MEAN_PIXEL = [123.68, 116.78, 103.94] GT_NOISE_STD = 0 QUICK_TEST = args.quick_test ## these can be used to run with dynamic shapes BIN_PADDING = None # 8 IMAGE_RESIZE_MODE = "square" # "pad64" DYNAMIC_ANCHORS = False # True PRESET_LAYERS_TRAIN = args.train_layers if args.dynamic: IMAGE_RESIZE_MODE = "pad64" DYNAMIC_ANCHORS = True if BIN_PADDING or IMAGE_RESIZE_MODE in ['no_pad', 'pad64' ] or QUICK_TEST: IMAGES_PER_GPU = 1 else: IMAGES_PER_GPU = 4 # Override if specified. if args.images_per_gpu is not None: IMAGES_PER_GPU = args.images_per_gpu # always evaluate using same number of samples regardless of number of gpus VAL_SAMPLES = 1600 if QUICK_TEST: VAL_SAMPLES = 1 _BATCH_SIZE = _GPU_COUNT * IMAGES_PER_GPU VALIDATION_STEPS = None # VAL_SAMPLES//_BATCH_SIZE if args.validation_steps is not None: VALIDATION_STEPS = args.validation_steps # lr is scaled with respect to the actual number of gpus LEARNING_RATE = 0.02 * (_BATCH_SIZE / 16)**0.5 DETERMINISTIC = args.deterministic if args.deterministic: LEARNING_RATE = 0 STEPS_PER_EPOCH = None # 5000 PYRAMID_ROI_CUSTOM_OP = int(args.custom_roi) LEARNING_MOMENTUM_CONST = True if args.momentum_const == '1' else False COMBINED_NMS_OP = True if args.combined_nms == '1' else False USE_VALID_BOXES = args.use_valid_boxes if args.xl_inputs: TRAIN_ROIS_PER_IMAGE = 512 ROI_POSITIVE_RATIO = 0.25 IMAGE_MIN_DIM_TRAIN = [640, 672, 704, 736, 768, 800, 832] IMAGE_MIN_DIM_VAL = 832 IMAGE_MAX_DIM = 1344 else: TRAIN_ROIS_PER_IMAGE = 256 ROI_POSITIVE_RATIO = 0.33 IMAGE_MIN_DIM_TRAIN = [640, 672, 704, 736, 768, 800] IMAGE_MIN_DIM_VAL = 800 IMAGE_MAX_DIM = 1024 if QUICK_TEST: TRAIN_ROIS_PER_IMAGE = 20 IMAGE_MAX_DIM = 512 if args.clip_norm > 0: GRADIENT_CLIP_NORM = args.clip_norm else: GRADIENT_CLIP_NORM = None # Number of classes (including background) NUM_CLASSES = 1 + 80 # COCO has 80 classes BACKBONE = args.backbone RPN_ONLY = args.rpn_only ### schedual settings WARMUP = 1000 if args.warmup_steps is not None: WARMUP = args.warmup_steps if QUICK_TEST: WARMUP = 1 if RPN_ONLY: DROPS = [40, 60] TOT_EPOCHS = 70 else: if args.short: ## short regime DROPS = [77, 154] TOT_EPOCHS = 175 else: ## long regime DROPS = [210, 280] TOT_EPOCHS = 300 if args.epochs is not None: TOT_EPOCHS = args.epochs if args.steps_per_epoch is not None: STEPS_PER_EPOCH = args.steps_per_epoch if STEPS_PER_EPOCH is not None: _SCHEDUAL_RATIO = max(STEPS_PER_EPOCH // 1000, 1) else: _SCHEDUAL_RATIO = max((117280 // _BATCH_SIZE) // 1000, 1) for i, v in enumerate(DROPS): DROPS[i] = int(v / _SCHEDUAL_RATIO + 0.5) del i del v if args.epochs is None: TOT_EPOCHS = int(TOT_EPOCHS / _SCHEDUAL_RATIO + 0.5) class InferenceConfig(CocoConfig): # Set batch size to 1 since we'll be running inference on # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU GPU_COUNT = 1 IMAGES_PER_GPU = 1 DETECTION_MIN_CONFIDENCE = 0.001 if args.command == "train": config = CocoConfig() mode = "training" else: config = InferenceConfig() mode = "inference" with tf.device("/device:CPU:0"): model = modellib.MaskRCNN(dev_str, mode=mode, config=config, model_dir=args.logs, hvd=hvd) exclude = None # Select weights file to load if args.model.lower() == "coco": model_path = COCO_MODEL_PATH elif args.model.lower() == "last": # Find last trained weights model_path = model.find_last() elif args.model.lower() == "imagenet": # Start from ImageNet trained weights with tf.device(dev_str): model_path = model.get_imagenet_weights() else: model_path = args.model if 'r101_imagenet_init.h5' in args.model: exclude = r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)|(anchors.*)|(mask\_.*)|" # Load weights if is_master: config.display() model.keras_model.summary() print("Loading weights", model_path) if 'keras' not in args.model: # keras backbone weights are automatically loaded during build with tf.device(dev_str): model.load_weights(model_path, by_name=True, exclude=exclude, resume=args.resume, verbose=is_master) # Train or evaluate if args.command == "train": # Training dataset. Use the training set and 35K from the # validation set, as as in the Mask RCNN paper. num_shards = 1 shard_id = 0 if hvd: num_shards = hvd.local_size() shard_id = hvd.local_rank() dataset_train = CocoDataset() dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download, num_shards=num_shards, shard_id=shard_id) if args.year in '2014': dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download, num_shards=num_shards, shard_id=shard_id) dataset_train.prepare() # Validation dataset dataset_val = CocoDataset() val_type = "val" if args.year in '2017' else "minival" dataset_val.load_coco(args.dataset, val_type, year=args.year, auto_download=args.download, num_shards=num_shards, shard_id=shard_id, limit=config.VAL_SAMPLES) dataset_val.prepare() augmentation = iaa.Fliplr(0.5) callbacks = [] ## add callbacks here schedule = COCOScheduler(config.LEARNING_RATE, warmup_steps=config.WARMUP, gamma=0.1, drops=config.DROPS, verbose=is_master) callbacks += [schedule] external_callbacks = getattr(args, 'external_callbacks', None) if external_callbacks is not None: callbacks.extend(external_callbacks) if is_master: print("Training Resnet stage 3+nobn") with tf.device("/device:CPU:0"): model.train(dev_str, dataset_train, dataset_val, learning_rate=config.LEARNING_RATE, epochs=config.TOT_EPOCHS, layers=config.PRESET_LAYERS_TRAIN, augmentation=augmentation, custom_callbacks=callbacks, dump_tf_timeline=args.dump_tf_timeline, disable_validation=args.disable_validation) elif args.command == "evaluate": # Validation dataset dataset_val = CocoDataset() val_type = "val" if args.year in '2017' else "minival" coco = dataset_val.load_coco( args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download, limit=args.limit if args.limit > 0 else None) dataset_val.prepare() print("Running COCO evaluation on {} images.".format( len(dataset_val.image_info))) evaluate_coco(model, dataset_val, coco) else: print("'{}' is not recognized. " "Use 'train' or 'evaluate'".format(args.command))
def main(): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.add_argument('--dataset_dir', type=str, default=config.DEFAULT_DATASET_DIR) parser.add_argument('--dropout_rate', type=float, default=0.0) parser.add_argument('--optimizer', type=str, default='sgd', choices=['sgd', 'adam', 'rmsprop']) parser.add_argument('--epsilon', type=float, default=1e-1) parser.add_argument('--label_smoothing', action='store_true') parser.add_argument('--use_lookahead', action='store_true') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--iter_size', type=int, default=1) parser.add_argument('--lr_sched', type=str, default='steps', choices=['linear', 'exp', 'steps']) parser.add_argument('--initial_lr', type=float, default=5e-2) parser.add_argument('--final_lr', type=float, default=1e-5) parser.add_argument('--weight_decay', type=float, default=1e-4) parser.add_argument('--epochs', type=int, default=90, help='total number of epochs for training [1]') parser.add_argument('--model', type=str, default='densenet121') parser.add_argument('--run_on_hpu', type=str, default='True') parser.add_argument('--bfloat16', type=str, default='True') parser.add_argument('--log_device_placement', action='store_true') parser.add_argument('--skip_eval', action='store_true') parser.add_argument('--measure_perf', action='store_true') parser.add_argument( '--extract_tensors', help="--extract_tensors <Path to dump extracted tensors>.", type=str) parser.add_argument( '--only_eval', help= "--only_eval <Path to checkpoint>. Performs model evaluation only.", type=str) parser.add_argument('--iterations', help="Sets number of iterations per epoch", type=int) parser.add_argument('--train_subset', type=str, default='train') parser.add_argument('--val_subset', type=str, default='validation') args = parser.parse_args() args.bfloat16 = eval(args.bfloat16) args.run_on_hpu = eval(args.run_on_hpu) if args.skip_eval or args.only_eval == None: tf.keras.backend.set_learning_phase(True) if args.run_on_hpu: log_info_devices = load_habana_module() print(f"Devices:\n {log_info_devices}") else: config_keras_backend_for_gpu() tf.debugging.set_log_device_placement(args.log_device_placement) if args.use_lookahead and args.iter_size > 1: raise ValueError('cannot set both use_lookahead and iter_size') os.makedirs(config.SAVE_DIR, exist_ok=True) os.makedirs(config.LOG_DIR, exist_ok=True) print("model: " + str(args.model)) print("dropout_rate: " + str(args.dropout_rate)) print("optimizer: " + str(args.optimizer)) print("epsilon: " + str(args.epsilon)) print("label_smoothing: " + str(args.label_smoothing)) print("use_lookahead: " + str(args.use_lookahead)) print("batch_size: " + str(args.batch_size)) print("iter_size: " + str(args.iter_size)) print("lr_sched: " + str(args.lr_sched)) print("initial_lr: " + str(args.initial_lr)) print("final_lr: " + str(args.final_lr)) print("weight_decay: " + str(args.weight_decay)) print("epochs: " + str(args.epochs)) print("iterations: " + str(args.iterations)) print("dataset_dir: " + str(args.dataset_dir)) print("skip_eval: " + str(args.skip_eval)) print("only_eval: " + str(args.only_eval)) print("run_on_hpu: " + str(args.run_on_hpu)) print("bfloat16: " + str(args.bfloat16)) print("train subset: " + str(args.train_subset)) print("val subset: " + str(args.val_subset)) train(args.model, args.dropout_rate, args.optimizer, args.epsilon, args.label_smoothing, args.use_lookahead, args.batch_size, args.iter_size, args.lr_sched, args.initial_lr, args.final_lr, args.weight_decay, args.epochs, args.iterations, args.dataset_dir, args.skip_eval, args.only_eval, args.run_on_hpu, args.measure_perf, args.extract_tensors, args.bfloat16, args.train_subset, args.val_subset) clear_keras_session()