コード例 #1
0
def main(_):
    log_info_devices = load_habana_module()
    print(f"Devices:\n {log_info_devices}")

    model_helpers.apply_clean(flags.FLAGS)
    with logger.benchmark_context(flags.FLAGS):
        stats = run(flags.FLAGS)
    logging.info('Run stats:\n%s', stats)
コード例 #2
0
    def _horovod_init(framework):
        size = comm_size()
        rank = comm_rank()

        hcl_config = get_hcl_config()
        hcl_type = get_hcl_type(hcl_config)

        if hcl_type != "HLS1-H":
            # All env variables should be set before loading_habana_modules
            if is_hierarchical():
                os.environ["HLS1_MODULE_ID"] = str(comm_local_rank())
                os.environ["ID"] = str(comm_local_rank())
            else:
                if size > 1:
                    os.environ["HLS1_MODULE_ID"] = str(get_hw_module_id(rank))
                    os.environ["ID"] = str(get_hw_module_id(rank))

        # Make sure every rank logging to different file
        # Only important on the same machine - so pretty much every scenarios
        if size > 1:
            rank_prefix = "rank_{}_".format(rank)
            HorovodHelpers._set_env_prefix("TF_RANK_PREFIX", rank_prefix,
                                           False)
            HorovodHelpers._set_env_prefix("HBN_TF_GRAPH_PREFIX", rank_prefix,
                                           False)
            HorovodHelpers._set_env_prefix("TF_DUMP_GRAPH_PREFIX", rank_prefix,
                                           True)
            HorovodHelpers._hvd_rank_prefix = rank_prefix

        # Init synapse logger (if required)
        synapse_logger_init()
        # Init TF Module (for CPU Allocator)
        load_habana_module()
        if framework == Framework.TENSORFLOW:
            import horovod.tensorflow as hvd
        elif framework == Framework.KERAS:
            import horovod.tensorflow.keras as hvd
        else:
            raise Exception(
                "Specified framework: {} is not supported by horovod_helpers".
                format(framework))

        hvd.init()
        assert rank == hvd.rank(
        ), "There is possible rank mismatch between mpi and horovod"
        HorovodHelpers._hvd = hvd
コード例 #3
0
def run_imagenet(flags_obj):
    """Run ResNet ImageNet training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.

  Returns:
    Dict of results of the run.  Contains the keys `eval_results` and
      `train_hooks`. `eval_results` contains accuracy (top_1) and
      accuracy_top_5. `train_hooks` is a list the instances of hooks used during
      training.
  """
    input_function = (flags_obj.use_synthetic_data and get_synth_input_fn(
        flags_core.get_tf_dtype(flags_obj)) or input_fn)

    if flags.FLAGS.is_mlperf_enabled:
        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    else:
        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

    if flags_obj.use_horovod:
        assert flags_obj.no_hpu == False, "Horovod without HPU is not supported in helpers."
        hvd_init()
    else:
        synapse_logger_init()

    if flags.FLAGS.is_mlperf_enabled:
        resnet_run_loop.init_mllog_mlloger()

    if not flags_obj.no_hpu:
        log_info_devices = load_habana_module()
        print(f"Devices:\n {log_info_devices}")

    result = resnet_run_loop.resnet_main(
        flags_obj,
        imagenet_model_fn,
        input_function,
        DATASET_NAME,
        shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS])

    return result
コード例 #4
0
import tensorflow as tf

from TensorFlow.common.library_loader import load_habana_module

tf.compat.v1.disable_eager_execution()

load_habana_module()

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(10),
])

loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5, batch_size=128)

model.evaluate(x_test, y_test)
コード例 #5
0
    if FLAGS.export_dir:
        tf.gfile.MakeDirs(FLAGS.export_dir)
        squad_serving_input_fn = (build_squad_serving_input_fn(
            FLAGS.max_seq_length))
        tf.logging.info("Starting to export model.")
        subfolder = estimator.export_saved_model(
            export_dir_base=os.path.join(FLAGS.export_dir, "saved_model"),
            serving_input_receiver_fn=squad_serving_input_fn)

        tf.logging.info("Starting to export TFLite.")
        converter = tf.lite.TFLiteConverter.from_saved_model(
            subfolder,
            input_arrays=["input_ids", "input_mask", "segment_ids"],
            output_arrays=["start_logits", "end_logits"])
        float_model = converter.convert()
        tflite_file = os.path.join(FLAGS.export_dir, "albert_model.tflite")
        with tf.gfile.GFile(tflite_file, "wb") as f:
            f.write(float_model)


if __name__ == "__main__":
    log_info_devices = load_habana_module()
    tf.logging.info("Devices:\n%s", log_info_devices)

    flags.mark_flag_as_required("spm_model_file")
    flags.mark_flag_as_required("albert_config_file")
    flags.mark_flag_as_required("output_dir")

    tf.app.run()
コード例 #6
0
ファイル: coco.py プロジェクト: rfdickerson/Model-References
def run_coco(args):
    print("Command: ", args.command)
    print("Model: ", args.model)
    print("Dataset: ", args.dataset)
    print("Year: ", args.year)
    print("Logs: ", args.logs)
    print("Auto Download: ", args.download)

    ############################################################
    #  Configurations
    ############################################################
    if args.deterministic:
        tf.config.threading.set_inter_op_parallelism_threads(1)
        tf.config.threading.set_intra_op_parallelism_threads(1)
        tf.reset_default_graph()
        SEED = 0
        os.environ['PYTHONHASHSEED'] = str(SEED)
        os.environ['TF_DETERMINISTIC_OPS'] = '1'
        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

    is_master = True
    hvd = None

    if args.gpus < 0:
        config = tf.ConfigProto(device_count={'GPU': 0})
        K.set_session(tf.Session(config=config))
        print('running on cpu')

    if args.using_horovod and args.command == "train":
        if args.device in ['HPU']:
            from TensorFlow.common.horovod_helpers import hvd_init, Framework
            hvd = hvd_init(framework=Framework.KERAS)
        else:
            import horovod.tensorflow.keras as hvd
            hvd.init()
            confighorovod = tf.ConfigProto()
            confighorovod.gpu_options.visible_device_list = str(
                hvd.local_rank())
            K.set_session(tf.Session(config=confighorovod))
        is_master = hvd.local_rank() == 0
        if not is_master:
            tf.get_logger().setLevel(tf.logging.FATAL)

    elif args.using_horovod and args.command == "evaluate":
        if args.device in ['HPU']:
            from TensorFlow.common.horovod_helpers import hvd_init, Framework
            hvd = hvd_init(framework=Framework.KERAS)
        else:
            confighorovod = tf.ConfigProto()
            confighorovod.gpu_options.visible_device_list = str(args.gpus)
            K.set_session(tf.Session(config=confighorovod))
        is_master = hvd.local_rank() == 0
        if not is_master:
            tf.get_logger().setLevel(tf.logging.FATAL)

    if args.device in ['HPU']:
        from TensorFlow.common.library_loader import load_habana_module
        load_habana_module()

    dev_str = f'/device:{args.device}:0'
    print(f'Selected device: {dev_str}')

    class CocoConfig(Config):
        """Configuration for training on MS COCO.
        Derives from the base Config class and overrides values specific
        to the COCO dataset.
        """
        # Give the configuration a recognizable name
        NAME = "coco"
        if hvd:
            _GPU_COUNT = hvd.size()
            GPU_COUNT = 1  #fix batch size as IMAGES_PER_GPU
        else:
            _GPU_COUNT = abs(args.gpus)
            GPU_COUNT = _GPU_COUNT

        if args.fchollet_fix:
            BGR = True
            ## mean pixel is in RGB format to match original settings
            MEAN_PIXEL = [123.68, 116.78, 103.94]
        elif args.BGR or 'kapp_' in args.backbone:
            ## BGR/caffe format
            BGR = True
            MEAN_PIXEL = [103.94, 116.78, 123.68]
        else:
            ## default RGB mode
            BGR = False
            MEAN_PIXEL = [123.68, 116.78, 103.94]

        GT_NOISE_STD = 0

        QUICK_TEST = args.quick_test
        ## these can be used to run with dynamic shapes
        BIN_PADDING = None  # 8
        IMAGE_RESIZE_MODE = "square"  # "pad64"
        DYNAMIC_ANCHORS = False  # True
        PRESET_LAYERS_TRAIN = args.train_layers
        if args.dynamic:
            IMAGE_RESIZE_MODE = "pad64"
            DYNAMIC_ANCHORS = True

        if BIN_PADDING or IMAGE_RESIZE_MODE in ['no_pad', 'pad64'
                                                ] or QUICK_TEST:
            IMAGES_PER_GPU = 1
        else:
            IMAGES_PER_GPU = 4
        # Override if specified.
        if args.images_per_gpu is not None:
            IMAGES_PER_GPU = args.images_per_gpu
        # always evaluate using same number of samples regardless of number of gpus
        VAL_SAMPLES = 1600
        if QUICK_TEST:
            VAL_SAMPLES = 1
        _BATCH_SIZE = _GPU_COUNT * IMAGES_PER_GPU
        VALIDATION_STEPS = None  # VAL_SAMPLES//_BATCH_SIZE
        if args.validation_steps is not None:
            VALIDATION_STEPS = args.validation_steps
        # lr is scaled with respect to the actual number of gpus
        LEARNING_RATE = 0.02 * (_BATCH_SIZE / 16)**0.5
        DETERMINISTIC = args.deterministic
        if args.deterministic:
            LEARNING_RATE = 0
        STEPS_PER_EPOCH = None  # 5000
        PYRAMID_ROI_CUSTOM_OP = int(args.custom_roi)
        LEARNING_MOMENTUM_CONST = True if args.momentum_const == '1' else False
        COMBINED_NMS_OP = True if args.combined_nms == '1' else False
        USE_VALID_BOXES = args.use_valid_boxes
        if args.xl_inputs:
            TRAIN_ROIS_PER_IMAGE = 512
            ROI_POSITIVE_RATIO = 0.25
            IMAGE_MIN_DIM_TRAIN = [640, 672, 704, 736, 768, 800, 832]
            IMAGE_MIN_DIM_VAL = 832
            IMAGE_MAX_DIM = 1344
        else:
            TRAIN_ROIS_PER_IMAGE = 256
            ROI_POSITIVE_RATIO = 0.33
            IMAGE_MIN_DIM_TRAIN = [640, 672, 704, 736, 768, 800]
            IMAGE_MIN_DIM_VAL = 800
            IMAGE_MAX_DIM = 1024
        if QUICK_TEST:
            TRAIN_ROIS_PER_IMAGE = 20
            IMAGE_MAX_DIM = 512
        if args.clip_norm > 0:
            GRADIENT_CLIP_NORM = args.clip_norm
        else:
            GRADIENT_CLIP_NORM = None
        # Number of classes (including background)
        NUM_CLASSES = 1 + 80  # COCO has 80 classes
        BACKBONE = args.backbone
        RPN_ONLY = args.rpn_only
        ### schedual settings
        WARMUP = 1000
        if args.warmup_steps is not None:
            WARMUP = args.warmup_steps
        if QUICK_TEST:
            WARMUP = 1
        if RPN_ONLY:
            DROPS = [40, 60]
            TOT_EPOCHS = 70
        else:
            if args.short:  ## short regime
                DROPS = [77, 154]
                TOT_EPOCHS = 175
            else:  ## long regime
                DROPS = [210, 280]
                TOT_EPOCHS = 300

        if args.epochs is not None:
            TOT_EPOCHS = args.epochs

        if args.steps_per_epoch is not None:
            STEPS_PER_EPOCH = args.steps_per_epoch

        if STEPS_PER_EPOCH is not None:
            _SCHEDUAL_RATIO = max(STEPS_PER_EPOCH // 1000, 1)
        else:
            _SCHEDUAL_RATIO = max((117280 // _BATCH_SIZE) // 1000, 1)
        for i, v in enumerate(DROPS):
            DROPS[i] = int(v / _SCHEDUAL_RATIO + 0.5)
        del i
        del v
        if args.epochs is None:
            TOT_EPOCHS = int(TOT_EPOCHS / _SCHEDUAL_RATIO + 0.5)

    class InferenceConfig(CocoConfig):
        # Set batch size to 1 since we'll be running inference on
        # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
        GPU_COUNT = 1
        IMAGES_PER_GPU = 1
        DETECTION_MIN_CONFIDENCE = 0.001

    if args.command == "train":
        config = CocoConfig()
        mode = "training"
    else:
        config = InferenceConfig()
        mode = "inference"

    with tf.device("/device:CPU:0"):
        model = modellib.MaskRCNN(dev_str,
                                  mode=mode,
                                  config=config,
                                  model_dir=args.logs,
                                  hvd=hvd)

    exclude = None
    # Select weights file to load
    if args.model.lower() == "coco":
        model_path = COCO_MODEL_PATH
    elif args.model.lower() == "last":
        # Find last trained weights
        model_path = model.find_last()
    elif args.model.lower() == "imagenet":
        # Start from ImageNet trained weights
        with tf.device(dev_str):
            model_path = model.get_imagenet_weights()
    else:
        model_path = args.model
        if 'r101_imagenet_init.h5' in args.model:
            exclude = r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)|(anchors.*)|(mask\_.*)|"

    # Load weights
    if is_master:
        config.display()
        model.keras_model.summary()
        print("Loading weights", model_path)
    if 'keras' not in args.model:
        # keras backbone weights are automatically loaded during build
        with tf.device(dev_str):
            model.load_weights(model_path,
                               by_name=True,
                               exclude=exclude,
                               resume=args.resume,
                               verbose=is_master)
    # Train or evaluate
    if args.command == "train":
        # Training dataset. Use the training set and 35K from the
        # validation set, as as in the Mask RCNN paper.
        num_shards = 1
        shard_id = 0
        if hvd:
            num_shards = hvd.local_size()
            shard_id = hvd.local_rank()
        dataset_train = CocoDataset()
        dataset_train.load_coco(args.dataset,
                                "train",
                                year=args.year,
                                auto_download=args.download,
                                num_shards=num_shards,
                                shard_id=shard_id)

        if args.year in '2014':
            dataset_train.load_coco(args.dataset,
                                    "valminusminival",
                                    year=args.year,
                                    auto_download=args.download,
                                    num_shards=num_shards,
                                    shard_id=shard_id)

        dataset_train.prepare()
        # Validation dataset
        dataset_val = CocoDataset()
        val_type = "val" if args.year in '2017' else "minival"
        dataset_val.load_coco(args.dataset,
                              val_type,
                              year=args.year,
                              auto_download=args.download,
                              num_shards=num_shards,
                              shard_id=shard_id,
                              limit=config.VAL_SAMPLES)
        dataset_val.prepare()

        augmentation = iaa.Fliplr(0.5)
        callbacks = []

        ## add callbacks here
        schedule = COCOScheduler(config.LEARNING_RATE,
                                 warmup_steps=config.WARMUP,
                                 gamma=0.1,
                                 drops=config.DROPS,
                                 verbose=is_master)
        callbacks += [schedule]

        external_callbacks = getattr(args, 'external_callbacks', None)
        if external_callbacks is not None:
            callbacks.extend(external_callbacks)

        if is_master:
            print("Training Resnet stage 3+nobn")
        with tf.device("/device:CPU:0"):
            model.train(dev_str,
                        dataset_train,
                        dataset_val,
                        learning_rate=config.LEARNING_RATE,
                        epochs=config.TOT_EPOCHS,
                        layers=config.PRESET_LAYERS_TRAIN,
                        augmentation=augmentation,
                        custom_callbacks=callbacks,
                        dump_tf_timeline=args.dump_tf_timeline,
                        disable_validation=args.disable_validation)

    elif args.command == "evaluate":
        # Validation dataset
        dataset_val = CocoDataset()
        val_type = "val" if args.year in '2017' else "minival"
        coco = dataset_val.load_coco(
            args.dataset,
            val_type,
            year=args.year,
            return_coco=True,
            auto_download=args.download,
            limit=args.limit if args.limit > 0 else None)
        dataset_val.prepare()
        print("Running COCO evaluation on {} images.".format(
            len(dataset_val.image_info)))
        evaluate_coco(model, dataset_val, coco)
    else:
        print("'{}' is not recognized. "
              "Use 'train' or 'evaluate'".format(args.command))
コード例 #7
0
def main():
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    parser.add_argument('--dataset_dir',
                        type=str,
                        default=config.DEFAULT_DATASET_DIR)
    parser.add_argument('--dropout_rate', type=float, default=0.0)
    parser.add_argument('--optimizer',
                        type=str,
                        default='sgd',
                        choices=['sgd', 'adam', 'rmsprop'])
    parser.add_argument('--epsilon', type=float, default=1e-1)
    parser.add_argument('--label_smoothing', action='store_true')
    parser.add_argument('--use_lookahead', action='store_true')
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--iter_size', type=int, default=1)
    parser.add_argument('--lr_sched',
                        type=str,
                        default='steps',
                        choices=['linear', 'exp', 'steps'])
    parser.add_argument('--initial_lr', type=float, default=5e-2)
    parser.add_argument('--final_lr', type=float, default=1e-5)
    parser.add_argument('--weight_decay', type=float, default=1e-4)
    parser.add_argument('--epochs',
                        type=int,
                        default=90,
                        help='total number of epochs for training [1]')
    parser.add_argument('--model', type=str, default='densenet121')
    parser.add_argument('--run_on_hpu', type=str, default='True')
    parser.add_argument('--bfloat16', type=str, default='True')
    parser.add_argument('--log_device_placement', action='store_true')
    parser.add_argument('--skip_eval', action='store_true')
    parser.add_argument('--measure_perf', action='store_true')
    parser.add_argument(
        '--extract_tensors',
        help="--extract_tensors <Path to dump extracted tensors>.",
        type=str)
    parser.add_argument(
        '--only_eval',
        help=
        "--only_eval <Path to checkpoint>. Performs model evaluation only.",
        type=str)
    parser.add_argument('--iterations',
                        help="Sets number of iterations per epoch",
                        type=int)
    parser.add_argument('--train_subset', type=str, default='train')
    parser.add_argument('--val_subset', type=str, default='validation')
    args = parser.parse_args()

    args.bfloat16 = eval(args.bfloat16)
    args.run_on_hpu = eval(args.run_on_hpu)

    if args.skip_eval or args.only_eval == None:
        tf.keras.backend.set_learning_phase(True)

    if args.run_on_hpu:
        log_info_devices = load_habana_module()
        print(f"Devices:\n {log_info_devices}")
    else:
        config_keras_backend_for_gpu()
    tf.debugging.set_log_device_placement(args.log_device_placement)

    if args.use_lookahead and args.iter_size > 1:
        raise ValueError('cannot set both use_lookahead and iter_size')

    os.makedirs(config.SAVE_DIR, exist_ok=True)
    os.makedirs(config.LOG_DIR, exist_ok=True)

    print("model:           " + str(args.model))
    print("dropout_rate:    " + str(args.dropout_rate))
    print("optimizer:       " + str(args.optimizer))
    print("epsilon:         " + str(args.epsilon))
    print("label_smoothing: " + str(args.label_smoothing))
    print("use_lookahead:   " + str(args.use_lookahead))
    print("batch_size:      " + str(args.batch_size))
    print("iter_size:       " + str(args.iter_size))
    print("lr_sched:        " + str(args.lr_sched))
    print("initial_lr:      " + str(args.initial_lr))
    print("final_lr:        " + str(args.final_lr))
    print("weight_decay:    " + str(args.weight_decay))
    print("epochs:          " + str(args.epochs))
    print("iterations:      " + str(args.iterations))
    print("dataset_dir:     " + str(args.dataset_dir))
    print("skip_eval:       " + str(args.skip_eval))
    print("only_eval:       " + str(args.only_eval))
    print("run_on_hpu:      " + str(args.run_on_hpu))
    print("bfloat16:        " + str(args.bfloat16))
    print("train subset:    " + str(args.train_subset))
    print("val subset:      " + str(args.val_subset))

    train(args.model, args.dropout_rate, args.optimizer, args.epsilon,
          args.label_smoothing, args.use_lookahead, args.batch_size,
          args.iter_size, args.lr_sched, args.initial_lr, args.final_lr,
          args.weight_decay, args.epochs, args.iterations, args.dataset_dir,
          args.skip_eval, args.only_eval, args.run_on_hpu, args.measure_perf,
          args.extract_tensors, args.bfloat16, args.train_subset,
          args.val_subset)
    clear_keras_session()