Beispiel #1
0
 def apply(x):
     x = layers.Normalization(
         mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
         variance=[
             (0.229 * 255) ** 2,
             (0.224 * 255) ** 2,
             (0.225 * 255) ** 2,
         ],
         name=name + "_prestem_normalization",
     )(x)
     return x
Beispiel #2
0
def EfficientNetV2(
    width_coefficient,
    depth_coefficient,
    default_size,
    dropout_rate=0.2,
    drop_connect_rate=0.2,
    depth_divisor=8,
    min_depth=8,
    bn_momentum=0.9,
    activation="swish",
    blocks_args="default",
    model_name="efficientnetv2",
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
):
    """Instantiates the EfficientNetV2 architecture using given scaling coefficients.

  Args:
    width_coefficient: float, scaling coefficient for network width.
    depth_coefficient: float, scaling coefficient for network depth.
    default_size: integer, default input image size.
    dropout_rate: float, dropout rate before final classifier layer.
    drop_connect_rate: float, dropout rate at skip connections.
    depth_divisor: integer, a unit of network width.
    min_depth: integer, minimum number of filters.
    bn_momentum: float. Momentum parameter for Batch Normalization layers.
    activation: activation function.
    blocks_args: list of dicts, parameters to construct block modules.
    model_name: string, model name.
    include_top: whether to include the fully-connected layer at the top of the
      network.
    weights: one of `None` (random initialization), `"imagenet"` (pre-training
      on ImageNet), or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or
      numpy array to use as image input for the model.
    input_shape: optional shape tuple, only to be specified if `include_top` is
      False. It should have exactly 3 inputs channels.
    pooling: optional pooling mode for feature extraction when `include_top` is
      `False`. - `None` means that the output of the model will be the 4D tensor
      output of the last convolutional layer. - "avg" means that global average
      pooling will be applied to the output of the last convolutional layer, and
      thus the output of the model will be a 2D tensor. - `"max"` means that
      global max pooling will be applied.
    classes: optional number of classes to classify images into, only to be
      specified if `include_top` is True, and if no `weights` argument is
      specified.
    classifier_activation: A string or callable. The activation function to use
      on the `"top"` layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the `"top"` layer.
    include_preprocessing: Boolean, whether to include the preprocessing layer
      (`Rescaling`) at the bottom of the network. Defaults to `True`.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `"softmax"` or `None` when
      using a pretrained top layer.
  """

    if blocks_args == "default":
        blocks_args = DEFAULT_BLOCKS_ARGS[model_name]

    if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):
        raise ValueError("The `weights` argument should be either "
                         "`None` (random initialization), `imagenet` "
                         "(pre-training on ImageNet), "
                         "or the path to the weights file to be loaded."
                         f"Received: weights={weights}")

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights` as `'imagenet'` with `include_top`"
            " as true, `classes` should be 1000"
            f"Received: classes={classes}")

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        # Apply original V1 preprocessing for Bx variants
        # if number of channels allows it
        num_channels = input_shape[bn_axis - 1]
        if model_name.split("-")[-1].startswith("b") and num_channels == 3:
            x = layers.Rescaling(scale=1. / 255)(x)
            x = layers.Normalization(
                mean=[0.485, 0.456, 0.406],
                variance=[0.229**2, 0.224**2, 0.225**2],
                axis=bn_axis,
            )(x)
        else:
            x = layers.Rescaling(scale=1. / 128.0, offset=-1)(x)

    # Build stem
    stem_filters = round_filters(
        filters=blocks_args[0]["input_filters"],
        width_coefficient=width_coefficient,
        min_depth=min_depth,
        depth_divisor=depth_divisor,
    )
    x = layers.Conv2D(
        filters=stem_filters,
        kernel_size=3,
        strides=2,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        use_bias=False,
        name="stem_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="stem_bn",
    )(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # Build blocks
    blocks_args = copy.deepcopy(blocks_args)
    b = 0
    blocks = float(sum(args["num_repeat"] for args in blocks_args))

    for (i, args) in enumerate(blocks_args):
        assert args["num_repeat"] > 0

        # Update block input and output filters based on depth multiplier.
        args["input_filters"] = round_filters(
            filters=args["input_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)
        args["output_filters"] = round_filters(
            filters=args["output_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)

        # Determine which conv type to use:
        block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")]
        repeats = round_repeats(repeats=args.pop("num_repeat"),
                                depth_coefficient=depth_coefficient)
        for j in range(repeats):
            # The first block needs to take care of stride and filter size increase.
            if j > 0:
                args["strides"] = 1
                args["input_filters"] = args["output_filters"]

            x = block(
                activation=activation,
                bn_momentum=bn_momentum,
                survival_probability=drop_connect_rate * b / blocks,
                name="block{}{}_".format(i + 1, chr(j + 97)),
                **args,
            )(x)
            b += 1

    # Build top
    top_filters = round_filters(filters=1280,
                                width_coefficient=width_coefficient,
                                min_depth=min_depth,
                                depth_divisor=depth_divisor)
    x = layers.Conv2D(
        filters=top_filters,
        kernel_size=1,
        strides=1,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        data_format="channels_last",
        use_bias=False,
        name="top_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="top_bn",
    )(x)
    x = layers.Activation(activation=activation, name="top_activation")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         bias_initializer=tf.constant_initializer(0),
                         name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Load weights.
    if weights == "imagenet":
        if include_top:
            file_suffix = ".h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
        else:
            file_suffix = "_notop.h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
        file_name = model_name + file_suffix
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHTS_PATH + file_name,
                                           cache_subdir="models",
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Beispiel #3
0
def ResNetRS(
    depth: int,
    input_shape=None,
    bn_momentum=0.0,
    bn_epsilon=1e-5,
    activation: str = "relu",
    se_ratio=0.25,
    dropout_rate=0.25,
    drop_connect_rate=0.2,
    include_top=True,
    block_args: List[Dict[str, int]] = None,
    model_name="resnet-rs",
    pooling=None,
    weights="imagenet",
    input_tensor=None,
    classes=1000,
    classifier_activation: Union[str, Callable] = "softmax",
    include_preprocessing=True,
):
    """Build Resnet-RS model, given provided parameters.

    Args:
        depth: Depth of ResNet network.
        input_shape: optional shape tuple. It should have exactly 3 inputs
          channels, and width and height should be no smaller than 32. E.g.
          (200, 200, 3) would be one valid value.
        bn_momentum: Momentum parameter for Batch Normalization layers.
        bn_epsilon: Epsilon parameter for Batch Normalization layers.
        activation: activation function.
        se_ratio: Squeeze and Excitation layer ratio.
        dropout_rate: dropout rate before final classifier layer.
        drop_connect_rate: dropout rate at skip connections.
        include_top: whether to include the fully-connected layer at the top of
          the network.
        block_args: list of dicts, parameters to construct block modules.
        model_name: name of the model.
        pooling: optional pooling mode for feature extraction when `include_top`
          is `False`.
          - `None` means that the output of the model will be the 4D tensor
            output of the last convolutional layer.
          - `avg` means that global average pooling will be applied to the
            output of the last convolutional layer, and thus the output of the
            model will be a 2D tensor.
          - `max` means that global max pooling will be applied.
        weights: one of `None` (random initialization), `'imagenet'`
          (pre-training on ImageNet), or the path to the weights file to be
          loaded. Note- one model can have multiple imagenet variants depending
          on input shape it was trained with. For input_shape 224x224 pass
          `imagenet-i224` as argument. By default, highest input shape weights
          are downloaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to
          use as image input for the model.
        classes: optional number of classes to classify images into, only to be
          specified if `include_top` is True, and if no `weights` argument is
          specified.
        classifier_activation: A `str` or callable. The activation function to
          use on the "top" layer. Ignored unless `include_top=True`. Set
          `classifier_activation=None` to return the logits of the "top" layer.
        include_preprocessing: Boolean, whether to include the preprocessing
          layer (`Rescaling`) at the bottom of the network. Defaults to `True`.
          Note- Input image is normalized by ImageNet mean and standard
          deviation.

    Returns:
        A `tf.keras.Model` instance.

    Raises:
        ValueError: in case of invalid argument for `weights`, or invalid input
            shape.
        ValueError: if `classifier_activation` is not `softmax` or `None` when
            using a pretrained top layer.
    """
    # Validate parameters
    available_weight_variants = DEPTH_TO_WEIGHT_VARIANTS[depth]
    if weights == "imagenet":
        max_input_shape = max(available_weight_variants)
        # `imagenet` argument without explicit weights input size.
        # Picking weights trained with biggest available shape
        weights = f"{weights}-i{max_input_shape}"

    weights_allow_list = [f"imagenet-i{x}" for x in available_weight_variants]
    if not (weights in {*weights_allow_list, None}
            or tf.io.gfile.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `'imagenet'` "
            "(pre-training on ImageNet, with highest available input shape),"
            " or the path to the weights file to be loaded. "
            f"For ResNetRS{depth} the following weight variants are "
            f"available {weights_allow_list} (default=highest)."
            f" Received weights={weights}")

    if weights in weights_allow_list and include_top and classes != 1000:
        raise ValueError(
            f"If using `weights` as `'imagenet'` or any "
            f"of {weights_allow_list} "
            f"with `include_top` as true, `classes` should be 1000. "
            f"Received classes={classes}")

    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )
    # Define input tensor
    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        num_channels = input_shape[bn_axis - 1]
        x = layers.Rescaling(scale=1.0 / 255)(x)
        if num_channels == 3:
            x = layers.Normalization(
                mean=[0.485, 0.456, 0.406],
                variance=[0.229**2, 0.224**2, 0.225**2],
                axis=bn_axis,
            )(x)

    # Build stem
    x = STEM(bn_momentum=bn_momentum,
             bn_epsilon=bn_epsilon,
             activation=activation)(x)

    # Build blocks
    if block_args is None:
        block_args = BLOCK_ARGS[depth]

    for i, args in enumerate(block_args):
        survival_probability = get_survival_probability(
            init_rate=drop_connect_rate,
            block_num=i + 2,
            total_blocks=len(block_args) + 1,
        )

        x = BlockGroup(
            filters=args["input_filters"],
            activation=activation,
            strides=(1 if i == 0 else 2),
            num_repeats=args["num_repeats"],
            se_ratio=se_ratio,
            bn_momentum=bn_momentum,
            bn_epsilon=bn_epsilon,
            survival_probability=survival_probability,
            name=f"BlockGroup{i + 2}_",
        )(x)

    # Build head:
    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)

        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Download weights
    if weights in weights_allow_list:
        weights_input_shape = weights.split("-")[-1]  # e. g. "i160"
        weights_name = f"{model_name}-{weights_input_shape}"
        if not include_top:
            weights_name += "_notop"

        filename = f"{weights_name}.h5"
        download_url = BASE_WEIGHTS_URL + filename
        weights_path = data_utils.get_file(
            fname=filename,
            origin=download_url,
            cache_subdir="models",
            file_hash=WEIGHT_HASHES[filename],
        )
        model.load_weights(weights_path)

    elif weights is not None:
        model.load_weights(weights)

    return model