コード例 #1
0
def ssd_512(image_size,
            n_classes,
            mode='training',
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 128, 256, 512],
            offsets=None,
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            coords='centroids',
            normalize_coords=True,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=[2, 1, 0],
            confidence_thresh=0.01,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400,
            return_predictor_sizes=False):
    '''
    Build a Keras model with SSD512 architecture, see references.

    The base network is a reduced atrous VGG-16, extended by the SSD architecture,
    as described in the paper.

    Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network, the parameters passed here must
    be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading
    trained weights, the parameters passed here must be the same as the ones used
    to produce the trained weights.

    Some of these arguments are explained in more detail in the documentation of the
    `SSDBoxEncoder` class.

    Note: Requires Keras v2.0 or later. Currently works only with the
    TensorFlow backend (v1.0 or later).

    Arguments:
        image_size (tuple): The input image size in the format `(height, width, channels)`.
        n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
        mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode,
            the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes,
            the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding,
            non-maximum suppression, and top-k filtering. The difference between latter two modes is that
            'inference' follows the exact procedure of the original Caffe implementation, while
            'inference_fast' uses a faster prediction decoding procedure.
        l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers.
            Set to zero to deactivate L2-regularization.
        min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images.
        max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. All scaling factors between the smallest and the
            largest will be linearly interpolated. Note that the second to last of the linearly interpolated
            scaling factors will actually be the scaling factor for the last predictor layer, while the last
            scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
            if `two_boxes_for_ar1` is `True`.
        scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer.
            This list must be one element longer than the number of predictor layers. The first `k` elements are the
            scaling factors for the `k` predictor layers, while the last element is used for the second box
            for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
            last scaling factor must be passed either way, even if it is not being used.
            If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors
            must be greater than zero.
        aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
            generated. This list is valid for all prediction layers.
        aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer.
            This allows you to set the aspect ratios for each predictor layer individually, which is the case for the
            original SSD512 implementation. If a list is passed, it overrides `aspect_ratios_global`.
        two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
            If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
            using the scaling factor for the respective layer, the second one will be generated using
            geometric mean of said scaling factor and next bigger scaling factor.
        steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
            pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
            the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
            If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
            If no steps are provided, then they will be computed such that the anchor box center points will form an
            equidistant grid within the image dimensions.
        offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either floats or tuples of two floats. These numbers represent for each predictor layer how many
            pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
            as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
            of the step size specified in the `steps` argument. If the list contains floats, then that value will
            be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
            `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size.
        clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
        variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
            its respective variance value.
        coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format
            of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width,
            and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates,
            i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
        subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values
            of any shape that is broadcast-compatible with the image shape. The elements of this array will be
            subtracted from the image pixel intensity values. For example, pass a list of three integers
            to perform per-channel mean normalization for color images.
        divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or
            floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
            intensity values will be divided by the elements of this array. For example, pass a list
            of three integers to perform per-channel standard deviation normalization for color images.
        swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input
            image channels should be swapped.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box's confidence score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage.
        nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage.
        return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also
            a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since
            you can always get their sizes easily via the Keras API, but it's convenient and less error-prone
            to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the
            spatial dimensions of the predictor layers), for inference you don't need them.

    Returns:
        model: The Keras SSD512 model.
        predictor_sizes (optional): A Numpy array containing the `(height, width)` portion
            of the output tensor shape for each convolutional predictor layer. During
            training, the generator function needs this in order to transform
            the ground truth labels into tensors of identical structure as the
            output tensors of the model, which is in turn needed for the cost
            function.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 7  # The number of predictor conv layers in the network is 7 for the original SSD512
    n_classes += 1  # Account for the background class.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Define functions for the Lambda layers below.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc6')(pool5)

    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc7')(fc6)

    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_2')(conv6_1)

    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_2')(conv7_1)

    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_1')(conv7_2)
    conv8_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv8_padding')(conv8_1)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_2')(conv8_1)

    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_1')(conv8_2)
    conv9_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv9_padding')(conv9_1)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_2')(conv9_1)

    conv10_1 = Conv2D(128, (1, 1),
                      activation='relu',
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='conv10_1')(conv9_2)
    conv10_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                             name='conv10_padding')(conv10_1)
    conv10_2 = Conv2D(256, (4, 4),
                      strides=(1, 1),
                      activation='relu',
                      padding='valid',
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='conv10_2')(conv10_1)

    # Feed conv4_3 into the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)

    ### Build the convolutional predictor layers on top of the base network

    # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes`
    # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)`
    conv4_3_norm_mbox_conf = Conv2D(
        n_boxes[0] * n_classes, (3, 3),
        padding='same',
        kernel_initializer='he_normal',
        kernel_regularizer=l2(l2_reg),
        name='conv4_3_norm_mbox_conf')(conv4_3_norm)
    fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3),
                           padding='same',
                           kernel_initializer='he_normal',
                           kernel_regularizer=l2(l2_reg),
                           name='fc7_mbox_conf')(fc7)
    conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv6_2_mbox_conf')(conv6_2)
    conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv7_2_mbox_conf')(conv7_2)
    conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv8_2_mbox_conf')(conv8_2)
    conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv9_2_mbox_conf')(conv9_2)
    conv10_2_mbox_conf = Conv2D(n_boxes[6] * n_classes, (3, 3),
                                padding='same',
                                kernel_initializer='he_normal',
                                kernel_regularizer=l2(l2_reg),
                                name='conv10_2_mbox_conf')(conv10_2)
    # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4`
    # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)`
    conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3),
                                   padding='same',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(l2_reg),
                                   name='conv4_3_norm_mbox_loc')(conv4_3_norm)
    fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3),
                          padding='same',
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(l2_reg),
                          name='fc7_mbox_loc')(fc7)
    conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv6_2_mbox_loc')(conv6_2)
    conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv7_2_mbox_loc')(conv7_2)
    conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv8_2_mbox_loc')(conv8_2)
    conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv9_2_mbox_loc')(conv9_2)
    conv10_2_mbox_loc = Conv2D(n_boxes[6] * 4, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv10_2_mbox_loc')(conv10_2)

    ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names)

    # Output shape of anchors: `(batch, height, width, n_boxes, 8)`
    conv4_3_norm_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[0],
        next_scale=scales[1],
        aspect_ratios=aspect_ratios[0],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[0],
        this_offsets=offsets[0],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc)
    fc7_mbox_priorbox = AnchorBoxes(img_height,
                                    img_width,
                                    this_scale=scales[1],
                                    next_scale=scales[2],
                                    aspect_ratios=aspect_ratios[1],
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    this_steps=steps[1],
                                    this_offsets=offsets[1],
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    coords=coords,
                                    normalize_coords=normalize_coords,
                                    name='fc7_mbox_priorbox')(fc7_mbox_loc)
    conv6_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[2],
        next_scale=scales[3],
        aspect_ratios=aspect_ratios[2],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[2],
        this_offsets=offsets[2],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc)
    conv7_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[3],
        next_scale=scales[4],
        aspect_ratios=aspect_ratios[3],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[3],
        this_offsets=offsets[3],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc)
    conv8_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[4],
        next_scale=scales[5],
        aspect_ratios=aspect_ratios[4],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[4],
        this_offsets=offsets[4],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc)
    conv9_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[5],
        next_scale=scales[6],
        aspect_ratios=aspect_ratios[5],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[5],
        this_offsets=offsets[5],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc)
    conv10_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[6],
        next_scale=scales[7],
        aspect_ratios=aspect_ratios[6],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[6],
        this_offsets=offsets[6],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv10_2_mbox_priorbox')(conv10_2_mbox_loc)

    ### Reshape

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    conv4_3_norm_mbox_conf_reshape = Reshape(
        (-1, n_classes),
        name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf)
    fc7_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf)
    conv6_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf)
    conv7_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf)
    conv8_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf)
    conv9_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf)
    conv10_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv10_2_mbox_conf_reshape')(conv10_2_mbox_conf)
    # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    conv4_3_norm_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc)
    fc7_mbox_loc_reshape = Reshape((-1, 4),
                                   name='fc7_mbox_loc_reshape')(fc7_mbox_loc)
    conv6_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc)
    conv7_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc)
    conv8_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc)
    conv9_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc)
    conv10_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv10_2_mbox_loc_reshape')(conv10_2_mbox_loc)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape(
        (-1, 8),
        name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)
    conv10_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv10_2_mbox_priorbox_reshape')(conv10_2_mbox_priorbox)

    ### Concatenate the predictions from the different layers

    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1, the number of boxes per layer
    # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes)
    mbox_conf = Concatenate(axis=1, name='mbox_conf')([
        conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape,
        conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape,
        conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape,
        conv10_2_mbox_conf_reshape
    ])

    # Output shape of `mbox_loc`: (batch, n_boxes_total, 4)
    mbox_loc = Concatenate(axis=1, name='mbox_loc')([
        conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape,
        conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape,
        conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape,
        conv10_2_mbox_loc_reshape
    ])

    # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8)
    mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([
        conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape,
        conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape,
        conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape,
        conv10_2_mbox_priorbox_reshape
    ])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)

    # Concatenate the class and box predictions and the anchors to one large predictions vector
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_priorbox])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        predictor_sizes = np.array([
            conv4_3_norm_mbox_conf._keras_shape[1:3],
            fc7_mbox_conf._keras_shape[1:3],
            conv6_2_mbox_conf._keras_shape[1:3],
            conv7_2_mbox_conf._keras_shape[1:3],
            conv8_2_mbox_conf._keras_shape[1:3],
            conv9_2_mbox_conf._keras_shape[1:3],
            conv10_2_mbox_conf._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
コード例 #2
0
    def build_model(self):
        vae_input = Input(shape=self.input_dim)
        #print("vae_input shape " + str(vae_input.shape))

        vae_c1 = Conv2D(filters=32,
                        kernel_size=4,
                        strides=2,
                        activation='relu')(vae_input)
        vae_c2 = Conv2D(filters=64,
                        kernel_size=4,
                        strides=2,
                        activation='relu')(vae_c1)
        vae_c3 = Conv2D(filters=64,
                        kernel_size=4,
                        strides=2,
                        activation='relu')(vae_c2)
        vae_c4 = Conv2D(filters=128,
                        kernel_size=4,
                        strides=2,
                        activation='relu')(vae_c3)

        #print("vae_c1 shape " + str(vae_c1.shape))
        #print("vae_c2 shape " + str(vae_c2.shape))
        #print("vae_c3 shape " + str(vae_c3.shape))
        #print("vae_c4 shape " + str(vae_c4.shape))

        vae_z_in = Flatten()(vae_c4)
        #print("vae_z_in shape " + str(vae_z_in.shape))

        vae_z_mean = Dense(self.z_dim)(vae_z_in)
        vae_z_log_var = Dense(self.z_dim)(vae_z_in)
        #print("vae_z_mean shape " + str(vae_z_mean.shape))
        #print("vae_z_log_var shape " + str(vae_z_log_var.shape))

        vae_z = Lambda(self.sampling)([vae_z_mean, vae_z_log_var])
        vae_z_input = Input(shape=(self.z_dim, ))
        print("vae_z shape " + str(vae_z.shape))
        #print("vae_z_input shape " + str(vae_z_input.shape))

        vae_dense = Dense(1024)
        vae_dense_model = vae_dense(vae_z)
        #print("vae_dense_model shape " + str(vae_dense_model.shape))

        vae_z_out = Reshape((1, 1, self.dense_size))
        vae_z_out_model = vae_z_out(vae_dense_model)
        #print("vae_z_out_model shape " + str(vae_z_out_model.shape))

        vae_d1 = Conv2DTranspose(filters=64,
                                 kernel_size=(3, 4),
                                 strides=2,
                                 activation='relu')
        vae_d2 = Conv2DTranspose(filters=64,
                                 kernel_size=(9, 11),
                                 strides=3,
                                 activation='relu')
        vae_d3 = Conv2DTranspose(filters=32,
                                 kernel_size=(4, 4),
                                 strides=4,
                                 activation='relu')
        vae_d4 = Conv2DTranspose(filters=1,
                                 kernel_size=(4, 4),
                                 strides=4,
                                 activation='sigmoid')

        vae_d1_model = vae_d1(vae_z_out_model)
        vae_d2_model = vae_d2(vae_d1_model)
        vae_d3_model = vae_d3(vae_d2_model)
        vae_d4_model = vae_d4(vae_d3_model)
        #print("vae_d1_model shape " + str(vae_d1_model.shape))
        #print("vae_d2_model shape " + str(vae_d2_model.shape))
        #print("vae_d3_model shape " + str(vae_d3_model.shape))
        #print("vae_d4_model shape " + str(vae_d4_model.shape))

        vae_dense_decoder = vae_dense(vae_z_input)
        vae_z_out_decoder = vae_z_out(vae_dense_decoder)
        #print("vae_z_out_decoder shape " + str(vae_z_out_decoder.shape))

        vae_d1_decoder = vae_d1(vae_z_out_decoder)
        vae_d2_decoder = vae_d2(vae_d1_decoder)
        vae_d3_decoder = vae_d3(vae_d2_decoder)
        vae_d4_decoder = vae_d4(vae_d3_decoder)
        #print("vae_d1_decoder shape " + str(vae_d1_decoder.shape))
        #print("vae_d2_decoder shape " + str(vae_d2_decoder.shape))
        #print("vae_d3_decoder shape " + str(vae_d3_decoder.shape))
        #print("vae_d4_decoder shape " + str(vae_d4_decoder.shape))

        # Models
        vae = Model(vae_input, vae_d4_model)
        vae_encoder = Model(vae_input, vae_z)
        vae_decoder = Model(vae_z_input, vae_d4_decoder)

        def vae_r_loss(y_true, y_pred):

            y_true_flat = K.flatten(y_true)
            y_pred_flat = K.flatten(y_pred)

            return 10 * K.mean(K.square(y_true_flat - y_pred_flat), axis=-1)

        def vae_kl_loss(y_true, y_pred):
            return -0.5 * K.mean(1 + vae_z_log_var - K.square(vae_z_mean) -
                                 K.exp(vae_z_log_var),
                                 axis=-1)

        def vae_loss(y_true, y_pred):
            return vae_r_loss(y_true, y_pred) + vae_kl_loss(y_true, y_pred)

        vae.compile(optimizer='rmsprop',
                    loss=vae_loss,
                    metrics=[vae_r_loss, vae_kl_loss])
        vae.summary()

        return (vae, vae_encoder, vae_decoder)
コード例 #3
0
def get_test_model_full():
    """Returns a maximally complex test model,
    using all supported layer types with different parameter combination.
    """
    input_shapes = [
        (26, 28, 3),
        (4, 4, 3),
        (4, 4, 3),
        (4, ),
        (2, 3),
        (27, 29, 1),
        (17, 1),
        (17, 4),
    ]
    inputs = [Input(shape=s) for s in input_shapes]

    outputs = []

    for inp in inputs[6:8]:
        for padding in ['valid', 'same']:
            for s in range(1, 6):
                for out_channels in [1, 2]:
                    for d in range(1, 4):
                        outputs.append(
                            Conv1D(out_channels,
                                   s,
                                   padding=padding,
                                   dilation_rate=d)(inp))
        for padding_size in range(0, 5):
            outputs.append(ZeroPadding1D(padding_size)(inp))
        for crop_left in range(0, 2):
            for crop_right in range(0, 2):
                outputs.append(Cropping1D((crop_left, crop_right))(inp))
        for upsampling_factor in range(1, 5):
            outputs.append(UpSampling1D(upsampling_factor)(inp))
        for padding in ['valid', 'same']:
            for pool_factor in range(1, 6):
                for s in range(1, 4):
                    outputs.append(
                        MaxPooling1D(pool_factor, strides=s,
                                     padding=padding)(inp))
                    outputs.append(
                        AveragePooling1D(pool_factor,
                                         strides=s,
                                         padding=padding)(inp))
        outputs.append(GlobalMaxPooling1D()(inp))
        outputs.append(GlobalAveragePooling1D()(inp))

    for inp in [inputs[0], inputs[5]]:
        for padding in ['valid', 'same']:
            for h in range(1, 6):
                for out_channels in [1, 2]:
                    for d in range(1, 4):
                        outputs.append(
                            Conv2D(out_channels, (h, 1),
                                   padding=padding,
                                   dilation_rate=(d, 1))(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (h, 1),
                                            padding=padding,
                                            dilation_rate=(d, 1))(inp))
                    for sy in range(1, 4):
                        outputs.append(
                            Conv2D(out_channels, (h, 1),
                                   strides=(1, sy),
                                   padding=padding)(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (h, 1),
                                            strides=(sy, sy),
                                            padding=padding)(inp))
                for sy in range(1, 4):
                    outputs.append(
                        MaxPooling2D((h, 1), strides=(1, sy),
                                     padding=padding)(inp))
            for w in range(1, 6):
                for out_channels in [1, 2]:
                    for d in range(1, 4) if sy == 1 else [1]:
                        outputs.append(
                            Conv2D(out_channels, (1, w),
                                   padding=padding,
                                   dilation_rate=(1, d))(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (1, w),
                                            padding=padding,
                                            dilation_rate=(1, d))(inp))
                    for sx in range(1, 4):
                        outputs.append(
                            Conv2D(out_channels, (1, w),
                                   strides=(sx, 1),
                                   padding=padding)(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (1, w),
                                            strides=(sx, sx),
                                            padding=padding)(inp))
                for sx in range(1, 4):
                    outputs.append(
                        MaxPooling2D((1, w), strides=(1, sx),
                                     padding=padding)(inp))
    outputs.append(ZeroPadding2D(2)(inputs[0]))
    outputs.append(ZeroPadding2D((2, 3))(inputs[0]))
    outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[0]))
    outputs.append(Cropping2D(2)(inputs[0]))
    outputs.append(Cropping2D((2, 3))(inputs[0]))
    outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[0]))
    for y in range(1, 3):
        for x in range(1, 3):
            outputs.append(UpSampling2D(size=(y, x))(inputs[0]))
    outputs.append(GlobalAveragePooling2D()(inputs[0]))
    outputs.append(GlobalMaxPooling2D()(inputs[0]))
    outputs.append(AveragePooling2D((2, 2))(inputs[0]))
    outputs.append(MaxPooling2D((2, 2))(inputs[0]))
    outputs.append(UpSampling2D((2, 2))(inputs[0]))
    outputs.append(keras.layers.concatenate([inputs[0], inputs[0]]))
    outputs.append(Dropout(0.5)(inputs[0]))

    outputs.append(BatchNormalization()(inputs[0]))
    outputs.append(BatchNormalization(center=False)(inputs[0]))
    outputs.append(BatchNormalization(scale=False)(inputs[0]))

    outputs.append(Conv2D(2, (3, 3), use_bias=True)(inputs[0]))
    outputs.append(Conv2D(2, (3, 3), use_bias=False)(inputs[0]))
    outputs.append(SeparableConv2D(2, (3, 3), use_bias=True)(inputs[0]))
    outputs.append(SeparableConv2D(2, (3, 3), use_bias=False)(inputs[0]))

    outputs.append(Dense(2, use_bias=True)(inputs[3]))
    outputs.append(Dense(2, use_bias=False)(inputs[3]))

    shared_conv = Conv2D(1, (1, 1),
                         padding='valid',
                         name='shared_conv',
                         activation='relu')

    up_scale_2 = UpSampling2D((2, 2))
    x1 = shared_conv(up_scale_2(inputs[1]))  # (1, 8, 8)
    x2 = shared_conv(up_scale_2(inputs[2]))  # (1, 8, 8)
    x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[2]))  # (1, 8, 8)
    x = keras.layers.concatenate([x1, x2, x3])  # (3, 8, 8)
    outputs.append(x)

    x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x)  # (3, 8, 8)
    outputs.append(x)
    x = Dropout(0.5)(x)
    outputs.append(x)
    x = keras.layers.concatenate(
        [MaxPooling2D((2, 2))(x),
         AveragePooling2D((2, 2))(x)])  # (6, 4, 4)
    outputs.append(x)

    x = Flatten()(x)  # (1, 1, 96)
    x = Dense(4, use_bias=False)(x)
    outputs.append(x)
    x = Dense(3)(x)  # (1, 1, 3)
    outputs.append(x)

    intermediate_input_shape = (3, )
    intermediate_in = Input(intermediate_input_shape)
    intermediate_x = intermediate_in
    intermediate_x = Dense(8)(intermediate_x)
    intermediate_x = Dense(5)(intermediate_x)
    intermediate_model = Model(inputs=[intermediate_in],
                               outputs=[intermediate_x],
                               name='intermediate_model')
    intermediate_model.compile(loss='mse', optimizer='nadam')

    x = intermediate_model(x)  # (1, 1, 5)

    intermediate_model_2 = Sequential()
    intermediate_model_2.add(Dense(7, input_shape=(5, )))
    intermediate_model_2.add(Dense(5))
    intermediate_model_2.compile(optimizer='rmsprop',
                                 loss='categorical_crossentropy')

    x = intermediate_model_2(x)  # (1, 1, 5)

    x = Dense(3)(x)  # (1, 1, 3)

    shared_activation = Activation('tanh')

    outputs = outputs + [
        Activation('tanh')(inputs[3]),
        Activation('hard_sigmoid')(inputs[3]),
        Activation('selu')(inputs[3]),
        Activation('sigmoid')(inputs[3]),
        Activation('softplus')(inputs[3]),
        Activation('softmax')(inputs[3]),
        Activation('relu')(inputs[3]),
        LeakyReLU()(inputs[3]),
        ELU()(inputs[3]),
        shared_activation(inputs[3]),
        inputs[4],
        inputs[1],
        x,
        shared_activation(x),
    ]

    print('Model has {} outputs.'.format(len(outputs)))

    model = Model(inputs=inputs, outputs=outputs, name='test_model_full')
    model.compile(loss='mse', optimizer='nadam')

    # fit to dummy data
    training_data_size = 1
    batch_size = 1
    epochs = 10
    data_in = generate_input_data(training_data_size, input_shapes)
    data_out = generate_output_data(training_data_size, outputs)
    model.fit(data_in, data_out, epochs=epochs, batch_size=batch_size)
    return model
コード例 #4
0
ファイル: vgg16.py プロジェクト: wittmaan/a2d2
 def build_model():
     inputs = Input(shape=(NORM_H, NORM_W, 3))
     # Block 1__
     x = Conv2D(
         64,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block1_conv1",
     )(inputs)
     x = Activation("relu")(x)
     x = Conv2D(
         64,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block1_conv2",
     )(x)
     x = Activation("relu")(x)
     x = MaxPooling2D(strides=(2, 2), name="block1_pool")(x)
     # Block 2
     x = Conv2D(
         128,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block2_conv1",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         128,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block2_conv2",
     )(x)
     x = Activation("relu")(x)
     x = MaxPooling2D(strides=(2, 2), name="block2_pool")(x)
     # Block 3
     x = Conv2D(
         256,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block3_conv1",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         256,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block3_conv2",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         256,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block3_conv3",
     )(x)
     x = Activation("relu")(x)
     x = MaxPooling2D(strides=(2, 2), name="block3_pool")(x)
     # Block 4
     x = Conv2D(
         512,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block4_conv1",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         512,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block4_conv2",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         512,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block4_conv3",
     )(x)
     x = Activation("relu")(x)
     x = MaxPooling2D(strides=(2, 2), name="block4_pool")(x)
     # Block 5
     x = Conv2D(
         512,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block5_conv1",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         512,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block5_conv2",
     )(x)
     x = Activation("relu")(x)
     x = Conv2D(
         512,
         (3, 3),
         padding="same",
         kernel_initializer="he_normal",
         kernel_regularizer=l2(1e-4),
         name="block5_conv3",
     )(x)
     x = Activation("relu")(x)
     x = MaxPooling2D(strides=(2, 2), name="block5_pool")(x)
     # Flatten
     x = Flatten(name="Flatten")(x)
     # Dimensions branch
     dimensions = Dense(512, name="d_fc_1")(x)
     dimensions = LeakyReLU(alpha=0.1)(dimensions)
     dimensions = Dropout(0.5)(dimensions)
     dimensions = Dense(3, name="d_fc_2")(dimensions)
     dimensions = LeakyReLU(alpha=0.1, name="dimensions")(dimensions)
     # Orientation branch
     orientation = Dense(256, name="o_fc_1")(x)
     orientation = LeakyReLU(alpha=0.1)(orientation)
     orientation = Dropout(0.5)(orientation)
     orientation = Dense(BIN * 2, name="o_fc_2")(orientation)
     orientation = LeakyReLU(alpha=0.1)(orientation)
     orientation = Reshape((BIN, -1))(orientation)
     orientation = Lambda(VGG16.l2_normalize,
                          name="orientation")(orientation)
     # Confidence branch
     confidence = Dense(256, name="c_fc_1")(x)
     confidence = LeakyReLU(alpha=0.1)(confidence)
     confidence = Dropout(0.5)(confidence)
     confidence = Dense(BIN, activation="softmax",
                        name="confidence")(confidence)
     # Build model
     return tf.keras.Model(inputs, [dimensions, orientation, confidence])
コード例 #5
0
ファイル: strong.py プロジェクト: Adamantios/NN-Train
def cifar100_student_strong(n_classes: int,
                            input_shape=None,
                            input_tensor=None,
                            weights_path: Union[None, str] = None) -> Model:
    """
    Defines a cifar100 strong student network.

    :param n_classes: the number of classes.
    :param input_shape: the input shape of the network. Can be omitted if input_tensor is used.
    :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used.
    :param weights_path: a path to a trained cifar10 tiny network's weights.
    :return: Keras functional Model.
    """
    inputs = create_inputs(input_shape, input_tensor)

    # Define a weight decay for the regularisation.
    weight_decay = 1e-4

    # Block1.
    x = Conv2D(32, (3, 3),
               padding='same',
               activation='elu',
               name='block1_conv1',
               kernel_regularizer=l2(weight_decay))(inputs)

    x = BatchNormalization(name='block1_batch-norm1')(x)
    x = Conv2D(64, (3, 3),
               padding='same',
               activation='elu',
               name='block1_conv2',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block1_batch-norm2')(x)
    x = MaxPooling2D(pool_size=(2, 2), name='block1_pool')(x)
    x = Dropout(0.2, name='block1_dropout', seed=0)(x)

    # Block2.
    x = Conv2D(128, (3, 3),
               padding='same',
               activation='elu',
               name='block2_conv1',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block2_batch-norm1')(x)
    x = Conv2D(128, (3, 3),
               padding='same',
               activation='elu',
               name='block2_conv2',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block2_batch-norm2')(x)
    x = MaxPooling2D(pool_size=(2, 2), name='block2_pool')(x)
    x = Dropout(0.3, name='block2_dropout', seed=0)(x)

    # Block3.
    x = Conv2D(256, (3, 3),
               padding='same',
               activation='elu',
               name='block3_conv1',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block3_batch-norm1')(x)
    x = Conv2D(256, (3, 3),
               padding='same',
               activation='elu',
               name='block3_conv2',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block3_batch-norm2')(x)
    x = MaxPooling2D(pool_size=(2, 2), name='block3_pool')(x)
    x = Dropout(0.4, name='block3_dropout', seed=0)(x)

    # Add top layers.
    x = Flatten()(x)
    x = Dense(n_classes)(x)
    outputs = Activation('softmax', name='softmax')(x)

    # Create model.
    model = Model(inputs, outputs, name='cifar100_student_strong')

    # Load weights, if they exist.
    load_weights(weights_path, model)

    return model
コード例 #6
0
def bn_feature_net_2D(receptive_field=61,
                      input_shape=(256, 256, 1),
                      n_features=3,
                      n_channels=1,
                      reg=1e-5,
                      n_conv_filters=64,
                      n_dense_filters=200,
                      VGG_mode=False,
                      init='he_normal',
                      norm_method='std',
                      location=False,
                      dilated=False,
                      padding=False,
                      padding_mode='reflect',
                      multires=False,
                      include_top=True):
    # Create layers list (x) to store all of the layers.
    # We need to use the functional API to enable the multiresolution mode
    x = []

    win = (receptive_field - 1) // 2

    if dilated:
        padding = True

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
        row_axis = 2
        col_axis = 3

        if not dilated:
            input_shape = (n_channels, receptive_field, receptive_field)

    else:
        row_axis = 1
        col_axis = 2
        channel_axis = -1
        if not dilated:
            input_shape = (receptive_field, receptive_field, n_channels)

    x.append(Input(shape=input_shape))
    x.append(ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1]))

    if padding:
        if padding_mode == 'reflect':
            x.append(ReflectionPadding2D(padding=(win, win))(x[-1]))
        elif padding_mode == 'zero':
            x.append(ZeroPadding2D(padding=(win, win))(x[-1]))

    if location:
        x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1]))
        x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]]))

    if multires:
        layers_to_concat = []

    rf_counter = receptive_field
    block_counter = 0
    d = 1

    while rf_counter > 4:
        filter_size = 3 if rf_counter % 2 == 0 else 4
        x.append(Conv2D(n_conv_filters, (filter_size, filter_size), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        block_counter += 1
        rf_counter -= filter_size - 1

        if block_counter % 2 == 0:
            if dilated:
                x.append(DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1]))
                d *= 2
            else:
                x.append(MaxPool2D(pool_size=(2, 2))(x[-1]))

            if VGG_mode:
                n_conv_filters *= 2

            rf_counter = rf_counter // 2

            if multires:
                layers_to_concat.append(len(x) - 1)

    if multires:
        c = []
        for l in layers_to_concat:
            output_shape = x[l].get_shape().as_list()
            target_shape = x[-1].get_shape().as_list()

            row_crop = int(output_shape[row_axis] - target_shape[row_axis])
            if row_crop % 2 == 0:
                row_crop = (row_crop // 2, row_crop // 2)
            else:
                row_crop = (row_crop // 2, row_crop // 2 + 1)

            col_crop = int(output_shape[col_axis] - target_shape[col_axis])
            if col_crop % 2 == 0:
                col_crop = (col_crop // 2, col_crop // 2)
            else:
                col_crop = (col_crop // 2, col_crop // 2 + 1)

            cropping = (row_crop, col_crop)

            c.append(Cropping2D(cropping=cropping)(x[l]))
        x.append(Concatenate(axis=channel_axis)(c))

    x.append(Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    x.append(TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    x.append(TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1]))

    if not dilated:
        x.append(Flatten()(x[-1]))

    if include_top:
        x.append(Softmax(axis=channel_axis)(x[-1]))

    model = Model(inputs=x[0], outputs=x[-1])

    return model
コード例 #7
0
def Deeplabv3(weights='pascal_voc',
              input_tensor=None,
              input_shape=(512, 512, 3),
              classes=21,
              backbone='mobilenetv2',
              OS=16,
              alpha=1.,
              activation=None):
    """ Instantiates the Deeplabv3+ architecture
    Optionally loads weights pre-trained
    on PASCAL VOC or Cityscapes. This model is available for TensorFlow only.
    # Arguments
        weights: one of 'pascal_voc' (pre-trained on pascal voc),
            'cityscapes' (pre-trained on cityscape) or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width
        classes: number of desired classes. PASCAL VOC has 21 classes, Cityscapes has 19 classes.
            If number of classes not aligned with the weights used, last layer is initialized randomly
        backbone: backbone to use. one of {'xception','mobilenetv2'}
        activation: optional activation to add to the top of the network.
            One of 'softmax', 'sigmoid' or None
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1.
    # Returns
        A Keras model instance.
    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`
    """

    if not (weights in {'pascal_voc', 'cityscapes', None}):
        raise ValueError(
            'The `weights` argument should be either '
            '`None` (random initialization), `pascal_voc`, or `cityscapes` '
            '(pre-trained on PASCAL VOC)')

    if not (backbone in {'xception', 'mobilenetv2'}):
        raise ValueError('The `backbone` argument should be either '
                         '`xception`  or `mobilenetv2` ')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        img_input = input_tensor

    if backbone == 'xception':
        if OS == 8:
            entry_block3_stride = 1
            middle_block_rate = 2  # ! Not mentioned in paper, but required
            exit_block_rates = (2, 4)
            atrous_rates = (12, 24, 36)
        else:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
            atrous_rates = (6, 12, 18)

        x = Conv2D(32, (3, 3),
                   strides=(2, 2),
                   name='entry_flow_conv1_1',
                   use_bias=False,
                   padding='same')(img_input)
        x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
        x = Activation(tf.nn.relu)(x)

        x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
        x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
        x = Activation(tf.nn.relu)(x)

        x = _xception_block(x, [128, 128, 128],
                            'entry_flow_block1',
                            skip_connection_type='conv',
                            stride=2,
                            depth_activation=False)
        x, skip1 = _xception_block(x, [256, 256, 256],
                                   'entry_flow_block2',
                                   skip_connection_type='conv',
                                   stride=2,
                                   depth_activation=False,
                                   return_skip=True)

        x = _xception_block(x, [728, 728, 728],
                            'entry_flow_block3',
                            skip_connection_type='conv',
                            stride=entry_block3_stride,
                            depth_activation=False)
        for i in range(16):
            x = _xception_block(x, [728, 728, 728],
                                'middle_flow_unit_{}'.format(i + 1),
                                skip_connection_type='sum',
                                stride=1,
                                rate=middle_block_rate,
                                depth_activation=False)

        x = _xception_block(x, [728, 1024, 1024],
                            'exit_flow_block1',
                            skip_connection_type='conv',
                            stride=1,
                            rate=exit_block_rates[0],
                            depth_activation=False)
        x = _xception_block(x, [1536, 1536, 2048],
                            'exit_flow_block2',
                            skip_connection_type='none',
                            stride=1,
                            rate=exit_block_rates[1],
                            depth_activation=True)

    else:
        OS = 8
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = Conv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2),
                   padding='same',
                   use_bias=False,
                   name='Conv')(img_input)
        x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
        x = Activation(tf.nn.relu6, name='Conv_Relu6')(x)

        x = _inverted_res_block(x,
                                filters=16,
                                alpha=alpha,
                                stride=1,
                                expansion=1,
                                block_id=0,
                                skip_connection=False)

        x = _inverted_res_block(x,
                                filters=24,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=1,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=24,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=2,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=3,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=4,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=5,
                                skip_connection=True)

        # stride in block 6 changed from 2 -> 1, so we need to use rate = 2
        x = _inverted_res_block(
            x,
            filters=64,
            alpha=alpha,
            stride=1,  # 1!
            expansion=6,
            block_id=6,
            skip_connection=False)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=7,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=8,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=9,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=10,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=11,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=12,
                                skip_connection=True)

        x = _inverted_res_block(
            x,
            filters=160,
            alpha=alpha,
            stride=1,
            rate=2,  # 1!
            expansion=6,
            block_id=13,
            skip_connection=False)
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=14,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=15,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=320,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=16,
                                skip_connection=False)

    # end of feature extractor

    # branching for Atrous Spatial Pyramid Pooling

    # Image Feature branch
    shape_before = tf.shape(x)
    b4 = GlobalAveragePooling2D()(x)
    # from (b_size, channels)->(b_size, 1, 1, channels)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Conv2D(256, (1, 1),
                padding='same',
                use_bias=False,
                name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
    b4 = Activation(tf.nn.relu)(b4)
    # upsample. have to use compat because of the option align_corners
    size_before = tf.keras.backend.int_shape(x)
    b4 = Lambda(lambda x: tf.compat.v1.image.resize(
        x, size_before[1:3], method='bilinear', align_corners=True))(b4)
    # simple 1x1
    b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
    b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
    b0 = Activation(tf.nn.relu, name='aspp0_activation')(b0)

    # there are only 2 branches in mobilenetV2. not sure why
    if backbone == 'xception':
        # rate = 6 (12)
        b1 = SepConv_BN(x,
                        256,
                        'aspp1',
                        rate=atrous_rates[0],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 12 (24)
        b2 = SepConv_BN(x,
                        256,
                        'aspp2',
                        rate=atrous_rates[1],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 18 (36)
        b3 = SepConv_BN(x,
                        256,
                        'aspp3',
                        rate=atrous_rates[2],
                        depth_activation=True,
                        epsilon=1e-5)

        # concatenate ASPP branches & project
        x = Concatenate()([b4, b0, b1, b2, b3])
    else:
        x = Concatenate()([b4, b0])

    x = Conv2D(256, (1, 1),
               padding='same',
               use_bias=False,
               name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
    x = Activation(tf.nn.relu)(x)
    x = Dropout(0.1)(x)
    # DeepLab v.3+ decoder

    if backbone == 'xception':
        # Feature projection
        # x4 (x2) block
        skip_size = tf.keras.backend.int_shape(skip1)
        x = Lambda(lambda xx: tf.compat.v1.image.resize(
            xx, skip_size[1:3], method='bilinear', align_corners=True))(x)

        dec_skip1 = Conv2D(48, (1, 1),
                           padding='same',
                           use_bias=False,
                           name='feature_projection0')(skip1)
        dec_skip1 = BatchNormalization(name='feature_projection0_BN',
                                       epsilon=1e-5)(dec_skip1)
        dec_skip1 = Activation(tf.nn.relu)(dec_skip1)
        x = Concatenate()([x, dec_skip1])
        x = SepConv_BN(x,
                       256,
                       'decoder_conv0',
                       depth_activation=True,
                       epsilon=1e-5)
        x = SepConv_BN(x,
                       256,
                       'decoder_conv1',
                       depth_activation=True,
                       epsilon=1e-5)

    # you can use it with arbitary number of classes
    if (weights == 'pascal_voc' and classes == 21) or (weights == 'cityscapes'
                                                       and classes == 19):
        last_layer_name = 'logits_semantic'
    else:
        last_layer_name = 'custom_logits_semantic'

    x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x)
    size_before3 = tf.keras.backend.int_shape(img_input)
    x = Lambda(lambda xx: tf.compat.v1.image.resize(
        xx, size_before3[1:3], method='bilinear', align_corners=True))(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    if activation in {'softmax', 'sigmoid'}:
        x = tf.keras.layers.Activation(activation)(x)

    model = Model(inputs, x, name='deeplabv3plus')

    # load weights

    if weights == 'pascal_voc':
        if backbone == 'xception':
            weights_path = get_file(
                'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH_X,
                cache_subdir='models')
        else:
            weights_path = get_file(
                'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH_MOBILE,
                cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    elif weights == 'cityscapes':
        if backbone == 'xception':
            weights_path = get_file(
                'deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5',
                WEIGHTS_PATH_X_CS,
                cache_subdir='models')
        else:
            weights_path = get_file(
                'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5',
                WEIGHTS_PATH_MOBILE_CS,
                cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    return model
# Step 1: Pre-process the data
from tensorflow.python.keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255.0
X_test = X_test / 255.0
y_train = keras.utils.to_categorical(y_train, n_classes)
y_test = keras.utils.to_categorical(y_test, n_classes)

# Step 2: Create the Model
model = Sequential()
model.add(
    Conv2D(32, (3, 3),
           activation='relu',
           input_shape=(32, 32, 3),
           padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))
#print(model.summary())

# Step 3: Compile the Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
def _build_normal_block(x, config):
    x = Conv2D(**config)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    return x
コード例 #10
0
x = x/255.0

#using for loop to change 3 parameters using 3 different value and search the best settings out of the 3 different value for each parameter
dense_layers = [0, 1, 2]
layer_sizes = [32, 64, 128]
conv_layers = [1, 2, 3]

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            name = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
            print(name)

            model = Sequential()
            model.add(Conv2D(layer_size, (3,3), input_shape = x.shape[1:]))
            model.add(Activation("relu"))
            model.add(MaxPooling2D(pool_size=(2,2)))

            for l in range(conv_layer-1):
                model.add(Conv2D(layer_size, (3,3)))
                model.add(Activation("relu"))
                model.add(MaxPooling2D(pool_size=(2,2)))

            model.add(Flatten())
            for l in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation("relu"))

            model.add(Dense(1))
            model.add(Activation('sigmoid'))
コード例 #11
0
    p2.ChangeDutyCycle(7.5)  #quay 90 do
    time.sleep(1)


img_rows = 128
img_cols = 128
num_channel = 1
num_epoch = 20

# Define the number of classes
num_classes = 3
labels_name = {'center': 0, 'left': 1, 'right': 2}

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
# Viewing model_configuration
コード例 #12
0
#Parametrizar la forma de imagenes
num_chanels=1
#RGB, HSV -> num_chanels=3
img_shape=(img_size,img_size,num_chanels)
num_clases=17
limiteImagenesPrueba=167
imagenes,etiquetas,probabilidades=cargarDatos("dataset/",num_clases,limiteImagenesPrueba)

model=Sequential()
#Capa entrada
model.add(InputLayer(input_shape=(img_size_flat,)))
#Reformar imagen
model.add(Reshape(img_shape))

#Capas convolucionales
model.add(Conv2D(kernel_size=5,strides=1,filters=16,padding='same',activation='relu',name='capa_convolucion_1'))
model.add(MaxPooling2D(pool_size=2,strides=2))

model.add(Conv2D(kernel_size=5,strides=1,filters=36,padding='same',activation='relu',name='capa_convolucion_2'))
model.add(MaxPooling2D(pool_size=2,strides=2))

model.add(Conv2D(kernel_size=5,strides=1,filters=48,padding='same',activation='relu',name='capa_convolucion_3'))
model.add(MaxPooling2D(pool_size=2,strides=2))

#Aplanar imagen
model.add(Flatten())
#Capa densa
model.add(Dense(128,activation='relu'))


#Capa salida
コード例 #13
0
def train(x_train, y_train, x_test, y_test, epochs):

    #  calculate classes
    if np.unique(y_train).shape[0] == np.unique(y_test).shape[0]:
        #
        num_classes = np.unique(y_train).shape[0]
    else:
        print('Error in class data...')
        return -2

    # set validation data
    '''val_size = int(0.1 * x_train.shape[0])
    r = np.random.randint(0, x_train.shape[0], size=val_size)
    x_val = x_train[r, :, :]
    y_val = y_train[r]
    x_train = np.delete(x_train, r, axis=0)
    y_train = np.delete(y_train, r, axis=0)'''
    step = int(x_train.shape[0] * 0.005)
    length = int(x_train.shape[0] * 0.1 * 0.005)
    r = []
    for i in range(0, x_train.shape[0] - length, step):
        r.extend(range(i, i + length))
    x_val = x_train[r, :, :]
    y_val = y_train[r]
    x_train = np.delete(x_train, r, axis=0)
    y_train = np.delete(y_train, r, axis=0)

    print('\nInitializing CNN2D...')
    print('\nclasses:', num_classes)
    print('x train shape:',
          x_train.shape), print('x val shape:',
                                x_val.shape), print('x test shape:',
                                                    x_test.shape)
    print('y train shape:',
          y_train.shape), print('y val shape:',
                                y_val.shape), print('y test shape:',
                                                    y_test.shape)
    print("\nTrain split with mean|std {:.2f}|{:.2f}".format(
        np.mean(x_train), np.std(x_train)))
    print("Test split with mean|std {:.2f}|{:.2f}".format(
        np.mean(x_test), np.std(x_test)))

    # shape data
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1],
                              x_train.shape[2], 1)
    x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], x_val.shape[2], 1)
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2],
                            1)
    y_train = tf.keras.utils.to_categorical(y_train, num_classes)
    y_val = tf.keras.utils.to_categorical(y_val, num_classes)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes)

    # define the model
    activation = 'elu'
    regularizer = 0.0000
    dropout = 0.25

    # preprocessing
    '''
    offset = 1.0 * np.std(x_train)
    dc0 = (x)
    dc1 = GaussianNoise(offset*0.1)(x)
    dc2 = GaussianDropout(dropout)(x)
    dc3 = Lambda(lambda r: r + __import__('keras').backend.random_uniform((1,), -offset, offset))(x)
    dc4 = Lambda(lambda r: r + __import__('keras').backend.random_uniform((1,), -offset, offset))(x)
    m = Concatenate()([dc0, dc1, dc2, dc3, dc4])
    m = Lambda(lambda r: r - __import__('keras').backend.mean(r))(x)
    '''

    # sequential

    model = Sequential()
    model.add(
        Conv2D(16,
               kernel_size=(3, 3),
               strides=(2, 1),
               activation='elu',
               kernel_regularizer=regularizers.l2(regularizer),
               input_shape=(x_train.shape[1], x_train.shape[2], 1)))
    model.add(EntropyPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))
    model.add(
        Conv2D(32,
               kernel_size=(3, 3),
               strides=(1, 1),
               activation='elu',
               kernel_regularizer=regularizers.l2(regularizer)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))
    model.add(
        Conv2D(64,
               kernel_size=(3, 3),
               strides=(1, 1),
               activation='elu',
               kernel_regularizer=regularizers.l2(regularizer)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))
    # model.add(Conv2D(128, kernel_size=(3, 3), strides=(1, 1), activation='elu', kernel_regularizer=regularizers.l2(regularizer)))
    # model.add(MaxPooling2D(pool_size=(1, 2)))
    # model.add(Dropout(dropout))
    model.add(Flatten())
    model.add(
        Dense(64,
              activation='elu',
              kernel_regularizer=regularizers.l2(regularizer)))
    model.add(Dropout(dropout))
    model.add(Dense(num_classes, activation='softmax'))

    # functional
    '''
    x = Input((x_train.shape[1], x_train.shape[2], x_train.shape[3]))
    m = Conv2D(16, 3, activation=activation , kernel_regularizer=regularizers.l2(regularizer))(x)
    m = EntropyPooling2D((2, 2))(m)
    m = Dropout(dropout)(m)
    m = Conv2D(32, 3, activation=activation, kernel_regularizer=regularizers.l2(regularizer))(m)
    m = EntropyPooling2D((2, 2))(m)
    m = Dropout(dropout)(m)
    m = Conv2D(64, 3, activation=activation, kernel_regularizer=regularizers.l2(regularizer))(m)
    m = EntropyPooling2D((2, 2))(m)
    m = Dropout(dropout)(m)
    if x_train.shape[1] < 50:
        #
        m = Flatten()(m)
    else:
        m = Conv2D(128, 3, activation=activation, kernel_regularizer=regularizers.l2(regularizer))(m)
        m = GlobalAveragePooling2D()(m)
        m = Dropout(dropout)(m)
    m = (Dense(64, activation=activation, kernel_regularizer=regularizers.l2(regularizer)))(m)
    m = Dropout(dropout)(m)
    y = Dense(num_classes, activation='softmax')(m)
    model = Model(inputs=[x], outputs=[y])
    '''

    # summarize model
    for i in range(0, len(model.layers)):
        if i == 0:
            plot_model(model, to_file='Models\\model_cnn2d.png')
            # f = open('Models\\model_cnn2d.txt', 'w')
            # print(' ')
        # print('{}. Layer {} with input / output shapes: {} / {}'.format(i, model.layers[i].name, model.layers[i].input_shape, model.layers[i].output_shape))
        # f.write('{}. Layer {} with input / output shapes: {} / {} \n'.format(i, model.layers[i].name, model.layers[i].input_shape, model.layers[i].output_shape))
        if i == len(model.layers) - 1:
            # f.close()
            print(' ')
            model.summary()

    # compile, fit evaluate
    callback = [
        callbacks.EarlyStopping(monitor='val_acc',
                                min_delta=0.01,
                                patience=10,
                                restore_best_weights=True)
    ]
    model.compile(loss=tf.keras.losses.categorical_crossentropy,
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=['accuracy'])
    model.fit(x_train,
              y_train,
              batch_size=256,
              epochs=epochs,
              verbose=2,
              validation_data=(x_val, y_val),
              callbacks=callback)
    score = model.evaluate(x_test, y_test, verbose=2)

    # evaluate on larger frames
    aggr_size = 5
    for i in range(0, y_test.shape[0] - aggr_size, aggr_size):
        if i == 0:
            y_pred = model.predict(x_test)
            y_pred = np.argmax(y_pred, axis=1)
            y_test = np.argmax(y_test, axis=1)
            y_aggr_test = []
            y_aggr_pred = []
        if np.unique(y_test[i:i + aggr_size]).shape[0] == 1:
            y_aggr_test.append(stats.mode(y_test[i:i + aggr_size])[0][0])
            y_aggr_pred.append(stats.mode(y_pred[i:i + aggr_size])[0][0])
    # print(confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1)))
    scipy_score = classification_report(y_aggr_test,
                                        y_aggr_pred,
                                        output_dict=True)['accuracy']
    print('short {:.2f} and aggr {:.2f}'.format(score[1], scipy_score))

    # save model
    open("Models\\model_cnn2d.json", "w").write(model.to_json())
    pickle.dump(model.get_config(), open("Models\\model_cnn2d.pickle", "wb"))
    model.save_weights("Models\\model_cnn2d.h5")

    # results
    return score[1]
コード例 #14
0
test_x = train[-2000:]
test_label = train_data.label[-2000:]
test_onehot = train_data.onehot[-2000:]


class_weight = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(train_data.label),
                                    y=train_data.label)


#CNN model

inputs = Input(shape=(48,48,1))

# First convolutional layer with ReLU-activation and max-pooling.
net = Conv2D(kernel_size=5, strides=1, filters=64, padding='same',
             activation='relu', name='layer_conv1')(inputs)
net = MaxPooling2D(pool_size=2, strides=2)(net)
net = BatchNormalization(axis = -1)(net)
net = Dropout(0.25)(net)

# Second convolutional layer with ReLU-activation and max-pooling.
net = Conv2D(kernel_size=5, strides=1, filters=128, padding='same',
             activation='relu', name='layer_conv2')(net)
net = MaxPooling2D(pool_size=2, strides=2)(net)
net = BatchNormalization(axis = -1)(net)
net = Dropout(0.25)(net)

# Third convolutional layer with ReLU-activation and max-pooling.
net = Conv2D(kernel_size=5, strides=1, filters=256, padding='same',
             activation='relu', name='layer_conv3')(net)
net = MaxPooling2D(pool_size=2, strides=2)(net)
コード例 #15
0
from tensorflow.python import keras
from tensorflow.python.keras import layers
from tensorflow.python.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPool2D, AvgPool2D, GlobalMaxPool2D, GlobalAvgPool2D, BatchNormalization, add, Input
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.optimizers import Adam

image_size = 224
input_object = Input(shape=(image_size, image_size, 3))

output = Conv2D(256, kernel_size=1, strides=1, padding="same")(input_object)
output = Activation("relu")(output)

output = Conv2D(256, kernel_size=1, strides=1, padding="same")(output)
output = Activation("relu")(output)

output = Conv2D(256, kernel_size=1, strides=1, padding="same")(output)
output = Activation("relu")(output)

output = Conv2D(256, kernel_size=1, strides=1, padding="same")(output)
output = Activation("relu")(output)

output = GlobalAvgPool2D()(output)

#output = Flatten()(output)

#output = Dense(1000)(output)
#output = Activation("relu")(output)
#output = MaxPool2D(pool_size=(3,3), strides=(2,2))(output)

model = Model(inputs=input_object, outputs=output)
コード例 #16
0
 def __call__(self, net, training=None):
     net = Conv2D(self.filters, self.kernelSize, strides=self.strides, padding='same')(net)
     net = BatchNormalization()(net, training=training)
     net = LeakyReLU()(net)
     return net
コード例 #17
0
def ResNet(input_shape=None,
           classes=10,
           block='bottleneck',
           residual_unit='v2',
           repetitions=None,
           initial_filters=64,
           activation='softmax',
           include_top=True,
           input_tensor=None,
           dropout=None,
           transition_dilation_rate=(1, 1),
           initial_strides=(2, 2),
           initial_kernel_size=(7, 7),
           initial_pooling='max',
           final_pooling=None,
           top='classification'):
    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.
    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError(
            'activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError(
            'sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]
    # Determine proper input shape
    # input_shape = _obtain_input_shape(input_shape,
    #                                   default_size=32,
    #                                   min_size=8,
    #                                   data_format=K.image_data_format(),
    #                                   require_flatten=include_top)
    _handle_dim_ordering()
    if len(input_shape) != 3:
        raise Exception(
            "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])
    # Determine proper input shape
    # input_shape = _obtain_input_shape(input_shape,
    #                                   default_size=32,
    #                                   min_size=8,
    #                                   data_format=K.image_data_format(),
    #                                   require_flatten=include_top)

    img_input = Input(shape=input_shape, tensor=input_tensor)
    x = _conv_bn_relu(filters=initial_filters,
                      kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        x = MaxPooling2D(pool_size=(3, 3),
                         strides=initial_strides,
                         padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1):
            transition_strides[0] = (2, 2)
        block = _residual_block(
            block_fn,
            filters=filters,
            stage=i,
            blocks=r,
            is_first_layer=(i == 0),
            dropout=dropout,
            transition_dilation_rates=transition_dilation_rates,
            transition_strides=transition_strides,
            residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation
    x = _bn_relu(block)

    # Classifier block
    if include_top and top is 'classification':
        x = GlobalAveragePooling2D()(x)
        x = Dense(units=classes,
                  activation=activation,
                  kernel_initializer="he_normal")(x)
    elif include_top and top is 'segmentation':
        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)

        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling2D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling2D()(x)

    model = Model(inputs=img_input, outputs=x)
    return model
コード例 #18
0
ファイル: convert.py プロジェクト: markgordon/keras-yolo3
def make_model(model_file,
               weights_file,
               anchor_file,
               end_step,
               initial_sparsity,
               end_sparsity,
               frequency,
               **kwargs):
    annotation_path = 'model_data/combined1.txt'
    log_dir = 'logs/000/'
    classes_path = 'model_data/classes.txt'
    anchors_path = 'model_data/yolo_anchors.txt'
    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    anchors = np.load(anchor_file,allow_pickle=True)
    model_path = 'model_data/'
    init_model= model_path + '/pelee3'
    new_pruned_keras_file = model_path + 'pruned_' + init_model
    epochs = 100
    batch_size = 16
    init_epoch = 50
    input_shape = (384,288) # multiple of 32, hw
    log_dir = 'logs/000/'
    config_path = model_file
    weights_path = weights_file
    output_path = model_file + '.tf'
    output_root = os.path.splitext(output_path)[0]
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(
        shape=(3, ), dtype='int32', buffer=weights_file.read(12))
    if (major*10+minor)>=2 and major<1000 and minor<1000:
        seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)
    first_layer = True
    print('Creating Keras model.')
    all_layers = []
    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    out_index = []
    pruning_params = {
        'pruning_schedule':tfmot.sparsity.keras.PolynomialDecay(initial_sparsity = initial_sparsity,
                                                     final_sparsity = end_sparsity,
                                                     begin_step = 0,
                                                     end_step = end_step,
                                                     frequency = frequency)
    }
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation != 'linear':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            if stride>1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
            if(first_layer):
                conv_layer = Conv2D(
                    filters, (size, size),
                    strides=(stride, stride),
                    kernel_regularizer=l2(weight_decay),
                    use_bias=not batch_normalize,
                    weights=conv_weights,
                    activation=act_fn,
                    padding=padding)(prev_layer)
            else:
                conv_layer =  prune.prune_low_magnitude(Conv2D(
                        filters, (size, size),
                        strides=(stride, stride),
                        kernel_regularizer=l2(weight_decay),
                        use_bias=not batch_normalize,
                        weights=conv_weights,
                        activation=act_fn,
                        padding=padding),
                        **pruning_params)(prev_layer)
            if batch_normalize:
                conv_layer = BatchNormalization(
                    weights=bn_weight_list)(conv_layer)
            prev_layer = conv_layer
            first_layer=False
            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)
            elif activation == 'swish':
                act_layer = sigmoid(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer
			
        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    pool_size=(size, size),
                    strides=(stride, stride),
                    padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]
            all_layers.append(LeakyReLU(alpha=0.1)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            out_index.append(len(all_layers)-1)
            all_layers.append(None)
            prev_layer = all_layers[-1]

        elif section.startswith('net'):
            height = int(cfg_parser[section]['height'])
            width = int(cfg_parser[section]['width'])
            input_layer = Input(shape=(height, width, 3))
            prev_layer = input_layer
            output_size = (width, height)

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    if len(out_index)==0: out_index.append(len(all_layers)-1)
    num_anchors = len(anchors[0])
    num_layers = len(out_index)
    if(num_layers>0):
        shape = K.int_shape(all_layers[out_index[0]])
        y1_reshape = KLayer.Reshape((shape[1],shape[2], num_anchors, 5 + num_classes), name='l1')(all_layers[out_index[0]])
    if(num_layers>1):
        shape = K.int_shape(all_layers[out_index[1]])
        y2_reshape = KLayer.Reshape((shape[1],shape[2], num_anchors, 5 + num_classes), name='l2')(all_layers[out_index[1]])
    yolo_model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
    if(num_layers > 1):
        yolo_model_wrapper = Model(input_layer, [y1_reshape, y2_reshape])
    else:
        yolo_model_wrapper = Model(input_layer, [y1_reshape])
    print(yolo_model.summary())
    return yolo_model,yolo_model_wrapper,output_size

    if False:
        if args.weights_only:
            model.save_weights('{}'.format(output_path))
            print('Saved Keras weights to {}'.format(output_path))
        else:
            model.save('{}'.format(output_path),save_format='tf')
            print('Saved Keras model to {}'.format(output_path))

        # Check to see if all weights have been read.
        remaining_weights = len(weights_file.read()) / 4
        weights_file.close()
        print('Read {} of {} from Darknet weights.'.format(count, count +
                                                           remaining_weights))
        if remaining_weights > 0:
            print('Warning: {} unused weights'.format(remaining_weights))

    if True:
        model = create_model(model, anchors, num_classes, input_shape, input_layer, layers, out_index)
        yolo_model_wrapper.compile(
            loss=tf.keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy'],
            callbacks = [
                sparsity.keras.pruning_callbacks.UpdatePruningStep(),
                sparsity.keras.pruning_callbacks.PruningSummaries(log_dir=log_dir, profile_batch=0)
            ]
            )
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        model.compile(optimizer=Adam(lr=1e-3), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
        print('Unfreeze all of the layers.')
        print(model.summary())

        batch_size = 16 # note that more GPU memory is required after unfreezing the body
        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
        model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
            steps_per_epoch=max(1, num_train//batch_size),
            validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
            validation_steps=max(1, num_val//batch_size),
            epochs=5,
            initial_epoch=0)


       #m2train.m2train(args,model)
        #score = model.evaluate(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
        #                       class_names, verbose=0)
        #print('Test loss:', score[0])
        #print('Test accuracy:', score[1])
    final_model=model
    final_model = sparsity.keras.prune.strip_pruning(model)
    final_model.summary()
    print('Saving pruned model to: ', new_pruned_keras_file)
    final_model.save('{}'.format(output_path),save_format='tf')
    tflite_model_file = model_path + "sparse.tf"
    converter = tf.lite.TFLiteConverter.from_keras_model(final_model)
    converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    tflite_model = converter.convert()
    with open(tflite_model_file, 'wb') as f:
      f.write(tflite_model)
コード例 #19
0
ファイル: submodel5.py プロジェクト: Adamantios/NN-Train
def cifar100_complicated_ensemble_submodel5(
        input_shape=None,
        input_tensor=None,
        n_classes=None,
        weights_path: Union[None, str] = None) -> Model:
    """
    Defines a cifar100 network.

    :param n_classes: used in order to be compatible with the main script.
    :param input_shape: the input shape of the network. Can be omitted if input_tensor is used.
    :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used.
    :param weights_path: a path to a trained custom network's weights.
    :return: Keras functional API Model.
    """
    inputs = create_inputs(input_shape, input_tensor)

    # Define a weight decay for the regularisation.
    weight_decay = 1e-5

    # Block 1.
    x = Conv2D(64, (3, 3),
               padding='same',
               activation='elu',
               name='block1_conv1',
               kernel_regularizer=l2(weight_decay))(inputs)
    x = BatchNormalization(name='block1_batch-norm1')(x)
    x = Conv2D(64, (3, 3),
               padding='same',
               activation='elu',
               name='block1_conv2',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block1_batch-norm2')(x)
    x = MaxPooling2D(pool_size=(2, 2), name='block1_pool')(x)
    x = Dropout(0.2, name='block1_dropout', seed=0)(x)

    # Block 2.
    x = Conv2D(128, (3, 3),
               padding='same',
               activation='elu',
               name='block2_conv1',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block2_batch-norm1')(x)
    x = Conv2D(128, (3, 3),
               padding='same',
               activation='elu',
               name='block2_conv2',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block2_batch-norm2')(x)
    x = MaxPooling2D(pool_size=(2, 2), name='block2_pool')(x)
    x = Dropout(0.3, name='block2_dropout', seed=0)(x)

    # Block 3.
    x = Conv2D(256, (3, 3),
               padding='same',
               activation='elu',
               name='block3_conv1',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block3_batch-norm1')(x)
    x = Conv2D(256, (3, 3),
               padding='same',
               activation='elu',
               name='block3_conv2',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization(name='block3_batch-norm2')(x)
    x = MaxPooling2D(pool_size=(2, 2), name='block3_pool')(x)
    x = Dropout(0.5, name='block3_dropout', seed=0)(x)

    # Add top layers.
    x = Flatten(name='flatten')(x)
    outputs = Dense(n_classes, activation='softmax', name='softmax_outputs')(x)

    # Create Submodel 4.
    model = Model(inputs,
                  outputs,
                  name='cifar100_complicated_ensemble_submodel5')
    # Load weights, if they exist.
    load_weights(weights_path, model)

    return model
コード例 #20
0
ファイル: intermediate_4.py プロジェクト: Saswati08/Projects-
 def upsample_1(x, factor, **kwargs):
     x = Conv2D(num_filters * (factor ** 2), 3, padding='same', **kwargs)(x)
     return Lambda(pixel_shuffle(scale=factor))(x)
コード例 #21
0
x_test = x_test.astype('float32') / 255
y_train = to_categorical(y_train, 6)
y_test = to_categorical(y_test, 6)

# input = Input(shape=(128, 128, 3))
# model = VGG16(weights=None, include_top=False, input_tensor=input, pooling='None')
# x = model.output
# x = Flatten()(x)
# x = Dense(4096, activation='relu')(x)
# x = Dense(4096, activation='relu')(x)
# predictions = Dense(6, activation='softmax')(x)
# model = tf.keras.Model(inputs=model.input, outputs=predictions)

k_size = (3,3)
input = Input(shape=(128, 128, 3))
x = Conv2D(32, k_size, padding='same', strides=2)(input)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv2D(64, k_size, padding='same', strides=1)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = MaxPool2D((2,2))(x)

x = Conv2D(128, k_size, padding='same', strides=2)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv2D(256, k_size, padding='same', strides=2)(x)
x = BatchNormalization()(x)
コード例 #22
0
for i in range(input_X.shape[0]):
    mx = np.random.randint(-50, 50)
    my = np.random.randint(-50, 50)
    input_X[i] = move_image(input_X[i].reshape(96, 96) * 255,
                            (mx, my)).reshape(96, 96, 1) / 255.0
    labels[i][np.arange(0, 30, 2)] = labels[i][np.arange(0, 30, 2)] + my + 50
    labels[i][np.arange(1, 30, 2)] = labels[i][np.arange(1, 30, 2)] + mx + 50
    labels[i] = labels[i] / (96 + 50)

from_i = 0
to_i = from_i + 1500
model = tf.keras.models.Sequential()
model.add(
    Conv2D(input_shape=(96, 96, 1),
           filters=128,
           kernel_size=(5, 5),
           activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(Dropout(0.25))
model.add(Conv2D(filters=48, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(90, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(30))
コード例 #23
0
def reconstruction(x, ch='ddfn', l2_coeff=0):
    if ch == 'ddfn':
        return Conv2D(3, 1, name='recons', kernel_regularizer=l2(l2_coeff))(x)
コード例 #24
0
try:
    from tensorflow.python.keras.models import Sequential
    from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
    from tensorflow.python.keras.layers import BatchNormalization
except:
    from tensorflow.contrib.keras.python.keras.layers import Conv2D, MaxPooling2D
    from tensorflow.contrib.keras.python.keras.models import Sequential
    from tensorflow.contrib.keras.python.keras.layers.normalization import BatchNormalization
from tfutils import Squeeze

model = Sequential([
    Conv2D(64,
           kernel_size=(3, 3),
           activation='relu',
           input_shape=(None, None, 1)),
    BatchNormalization(),
    Conv2D(64, kernel_size=(4, 4), activation='relu'),
    MaxPooling2D(),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(),
    Conv2D(200, kernel_size=(4, 4), activation='relu'),
    Conv2D(200, kernel_size=(1, 1), activation='relu'),
    Conv2D(3, kernel_size=(1, 1),
           activation='softmax'),  # filters num == # of labels
    Squeeze(3),
])
コード例 #25
0
    def build_model3(self):
        vae_input = Input(shape=self.input_dim)
        #print("vae_input shape " + str(vae_input.shape))

        vae_c1 = Conv2D(filters=16,
                        kernel_size=3,
                        padding='same',
                        activation='relu')(vae_input)
        vae_m1 = MaxPooling2D((2, 2), padding='same')(vae_c1)
        vae_c2 = Conv2D(filters=16,
                        kernel_size=3,
                        padding='same',
                        activation='relu')(vae_m1)
        vae_m2 = MaxPooling2D((2, 2), padding='same')(vae_c2)
        vae_c3 = Conv2D(filters=16,
                        kernel_size=3,
                        padding='same',
                        activation='relu')(vae_m2)
        vae_m3 = MaxPooling2D((2, 2), padding='same')(vae_c3)
        vae_c4 = Conv2D(filters=16,
                        kernel_size=3,
                        padding='same',
                        activation='relu')(vae_m3)
        vae_m4 = MaxPooling2D((2, 2), padding='same')(vae_c4)
        vae_c5 = Conv2D(filters=8,
                        kernel_size=3,
                        padding='same',
                        activation='relu')(vae_m4)
        vae_m5 = MaxPooling2D((2, 2), padding='same')(vae_c5)
        vae_c6 = Conv2D(filters=8,
                        kernel_size=3,
                        padding='same',
                        activation='relu')(vae_m5)
        vae_m6 = MaxPooling2D((2, 2), padding='same')(vae_c6)

        #print("vae_c1 shape " + str(vae_c1.shape))
        #print("vae_c2 shape " + str(vae_c2.shape))
        #print("vae_c3 shape " + str(vae_c3.shape))
        #print("vae_c4 shape " + str(vae_c4.shape))
        # print("vae_c5 shape " + str(vae_c5.shape))
        #print("vae_m1 shape " + str(vae_m1.shape))
        #print("vae_m2 shape " + str(vae_m2.shape))
        #print("vae_m3 shape " + str(vae_m3.shape))
        #print("vae_m4 shape " + str(vae_m4.shape))
        #print("vae_m5 shape " + str(vae_m5.shape))
        #print("vae_m6 shape " + str(vae_m6.shape))

        vae_z_in = Flatten()(vae_m6)
        print("vae_z_in shape " + str(vae_z_in.shape))

        vae_z_mean = Dense(self.z_dim)(vae_z_in)
        vae_z_log_var = Dense(self.z_dim)(vae_z_in)
        #print("vae_z_mean shape " + str(vae_z_mean.shape))
        #print("vae_z_log_var shape " + str(vae_z_log_var.shape))

        vae_z = Lambda(self.sampling)([vae_z_mean, vae_z_log_var])
        vae_z_input = Input(shape=(self.z_dim, ))
        #print("vae_z shape " + str(vae_z.shape))
        #print("vae_z_input shape " + str(vae_z_input.shape))

        vae_z_out = Reshape((5, 5, 1))
        vae_z_out_model = vae_z_out(vae_z)
        #print("vae_z_out_model shape " + str(vae_z_out_model.shape))

        #vae_d1 = Conv2D( filters=8, kernel_size=(3, 3), padding='same', activation='relu')
        vae_u1 = UpSampling2D((3, 4))
        vae_d2 = Conv2D(filters=16,
                        kernel_size=(3, 3),
                        padding='same',
                        activation='relu')
        vae_u2 = UpSampling2D((2, 2))
        vae_d3 = Conv2D(filters=16,
                        kernel_size=(3, 3),
                        padding='same',
                        activation='relu')
        vae_u3 = UpSampling2D((2, 2))
        vae_d4 = Conv2D(filters=8,
                        kernel_size=(3, 3),
                        padding='same',
                        activation='relu')
        vae_u4 = UpSampling2D((2, 2))
        vae_d5 = Conv2D(filters=4,
                        kernel_size=(3, 3),
                        padding='same',
                        activation='relu')
        vae_u5 = UpSampling2D((2, 2))
        vae_d6 = Conv2D(filters=1,
                        kernel_size=(3, 3),
                        padding='same',
                        activation='sigmoid')

        # vae_d1_model = vae_d1(vae_z_out_model)
        vae_u1_model = vae_u1(vae_z_out_model)
        vae_d2_model = vae_d2(vae_u1_model)
        vae_u2_model = vae_u2(vae_d2_model)
        vae_d3_model = vae_d3(vae_u2_model)
        vae_u3_model = vae_u3(vae_d3_model)
        vae_d4_model = vae_d4(vae_u3_model)
        vae_u4_model = vae_u4(vae_d4_model)
        vae_d5_model = vae_d5(vae_u4_model)
        vae_u5_model = vae_u5(vae_d5_model)
        vae_d6_model = vae_d6(vae_u5_model)
        #print("vae_d1_model shape " + str(vae_d1_model.shape))
        #print("vae_u1_model shape " + str(vae_u1_model.shape))
        #print("vae_d2_model shape " + str(vae_d2_model.shape))
        #print("vae_u2_model shape " + str(vae_u2_model.shape))
        #print("vae_d3_model shape " + str(vae_d3_model.shape))
        #print("vae_u3_model shape " + str(vae_u3_model.shape))
        #print("vae_d4_model shape " + str(vae_d4_model.shape))
        #print("vae_u4_model shape " + str(vae_u4_model.shape))
        #print("vae_d5_model shape " + str(vae_d5_model.shape))

        #240 120 60 30 15
        #320 160 80 40 20

        vae_dense_decoder = vae_z_input
        vae_z_out_decoder = vae_z_out(vae_dense_decoder)

        #vae_d1_decoder = vae_d1(vae_z_out_decoder)
        vae_u1_decoder = vae_u1(vae_z_out_decoder)
        vae_d2_decoder = vae_d2(vae_u1_decoder)
        vae_u2_decoder = vae_u2(vae_d2_decoder)
        vae_d3_decoder = vae_d3(vae_u2_decoder)
        vae_u3_decoder = vae_u3(vae_d3_decoder)
        vae_d4_decoder = vae_d4(vae_u3_decoder)
        vae_u4_decoder = vae_u4(vae_d4_decoder)
        vae_d5_decoder = vae_d5(vae_u4_decoder)
        vae_u5_decoder = vae_u5(vae_d5_decoder)
        vae_d6_decoder = vae_d6(vae_u5_decoder)
        print("vae_d1_decoder shape " + str(vae_u1_decoder.shape))
        print("vae_d2_decoder shape " + str(vae_d2_decoder.shape))
        print("vae_d3_decoder shape " + str(vae_d3_decoder.shape))
        print("vae_d4_decoder shape " + str(vae_d4_decoder.shape))
        print("vae_d5_decoder shape " + str(vae_d5_decoder.shape))

        # Models
        vae = Model(vae_input, vae_d6_model)
        vae_encoder = Model(vae_input, vae_z)
        vae_decoder = Model(vae_z_input, vae_d6_decoder)

        def vae_r_loss(y_true, y_pred):

            y_true_flat = K.flatten(y_true)
            y_pred_flat = K.flatten(y_pred)

            return 10 * K.mean(K.square(y_true_flat - y_pred_flat), axis=-1)

        def vae_kl_loss(y_true, y_pred):
            return -0.5 * K.mean(1 + vae_z_log_var - K.square(vae_z_mean) -
                                 K.exp(vae_z_log_var),
                                 axis=-1)

        def vae_loss(y_true, y_pred):
            return vae_r_loss(y_true, y_pred) + vae_kl_loss(y_true, y_pred)

        #vae.compile(optimizer='rmsprop', loss = vae_loss,  metrics = [vae_r_loss, vae_kl_loss])
        vae.compile(optimizer=Adam(lr=0.005),
                    loss=vae_loss,
                    metrics=[vae_r_loss, vae_kl_loss])
        vae.summary()

        return (vae, vae_encoder, vae_decoder)
コード例 #26
0
def build_fpn(backbone,
              fpn_layers,
              classes=21,
              activation='softmax',
              upsample_rates=(2, 2, 2),
              last_upsample=4,
              pyramid_filters=256,
              segmentation_filters=128,
              use_batchnorm=False,
              dropout=None,
              interpolation='bilinear'):
    """
    Implementation of FPN head for segmentation models according to:
        http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf

    Args:
        backbone: Keras `Model`, some classification model without top
        layers: list of layer names or indexes, used for pyramid building
        classes: int, number of output feature maps
        activation: activation in last layer, e.g. 'sigmoid' or 'softmax'
        upsample_rates: tuple of integers, scaling rates between pyramid blocks
        pyramid_filters: int, number of filters in `M` blocks of top-down FPN branch
        segmentation_filters: int, number of filters in `P` blocks of FPN
        last_upsample: rate for upsumpling concatenated pyramid predictions to
            match spatial resolution of input data
        last_upsampling_type: 'nn' or 'bilinear'
        dropout: float [0, 1), dropout rate
        use_batchnorm: bool, include batch normalization to FPN between `conv`
            and `relu` layers

    Returns:
        model: Keras `Model`
    """

    if len(upsample_rates) != len(fpn_layers):
        raise ValueError(
            'Number of intermediate feature maps and upsample steps should match'
        )

    # extract model layer outputs
    outputs = extract_outputs(backbone, fpn_layers, include_top=True)

    # add upsample rate `1` for first block
    upsample_rates = [1] + list(upsample_rates)

    # top - down path, build pyramid
    m = None
    pyramid = []
    for i, c in enumerate(outputs):
        m, p = pyramid_block(pyramid_filters=pyramid_filters,
                             segmentation_filters=segmentation_filters,
                             upsample_rate=upsample_rates[i],
                             use_batchnorm=use_batchnorm,
                             stage=i)(c, m)
        pyramid.append(p)

    # upsample and concatenate all pyramid layer
    upsampled_pyramid = []

    for i, p in enumerate(pyramid[::-1]):
        if upsample_rates[i] > 1:
            upsample_rate = to_tuple(np.prod(upsample_rates[:i + 1]))
            p = ResizeImage(upsample_rate, interpolation=interpolation)(p)
        upsampled_pyramid.append(p)

    x = Concatenate()(upsampled_pyramid)

    # final convolution
    n_filters = segmentation_filters * len(pyramid)
    x = Conv2DBlock(n_filters, (3, 3),
                    use_batchnorm=use_batchnorm,
                    padding='same')(x)
    if dropout is not None:
        x = SpatialDropout2D(dropout)(x)

    x = Conv2D(classes, (3, 3), padding='same')(x)

    # upsampling to original spatial resolution
    x = ResizeImage(to_tuple(last_upsample), interpolation=interpolation)(x)

    # activation
    x = Activation(activation)(x)

    model = Model(backbone.input, x)
    return model
コード例 #27
0
                                        target_size=(sz, sz),
                                        batch_size=16,
                                        class_mode='categorical')
test_gen = datagen.flow_from_directory(test_dir,
                                       target_size=(sz, sz),
                                       batch_size=16,
                                       class_mode='categorical')

label_map = train_gen.class_indices
print(label_map)

label_map1 = test_gen.class_indices
print(label_map1)

model = Sequential()
model.add(Conv2D(64, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
# model.add(Dense(64))
# model.add(Activation('softmax'))
# model.add(Dropout(0.5))
コード例 #28
0
    def build(self, input_shape):
        input_tensor = Input(shape=input_shape)
        # vgg block 1
        x = Conv2D(64,
                   3,
                   padding='same',
                   activation='relu',
                   name='block1_conv1')(input_tensor)
        x = Conv2D(64,
                   3,
                   padding='same',
                   activation='relu',
                   name='block1_conv2')(x)
        x = MaxPooling2D(2, 2, padding='same', name='block1_pool')(x)

        # vgg block 2
        x = Conv2D(128,
                   3,
                   padding='same',
                   activation='relu',
                   name='block2_conv1')(x)
        x = Conv2D(128,
                   3,
                   padding='same',
                   activation='relu',
                   name='block2_conv2')(x)
        x = MaxPooling2D(2, 2, padding='same', name='block2_pool')(x)

        # vgg block 3
        x = Conv2D(256,
                   3,
                   padding='same',
                   activation='relu',
                   name='block3_conv1')(x)
        x = Conv2D(256,
                   3,
                   padding='same',
                   activation='relu',
                   name='block3_conv2')(x)
        x = Conv2D(256,
                   3,
                   padding='same',
                   activation='relu',
                   name='block3_conv3')(x)
        x = MaxPooling2D(2, 2, padding='same', name='block3_pool')(x)

        # vgg block 4
        x = Conv2D(512,
                   3,
                   padding='same',
                   activation='relu',
                   name='block4_conv1')(x)
        x = Conv2D(512,
                   3,
                   padding='same',
                   activation='relu',
                   name='block4_conv2')(x)
        conv43 = Conv2D(512,
                        3,
                        padding='same',
                        activation='relu',
                        name='block4_conv3')(x)
        x = MaxPooling2D(2, 2, padding='same', name='block4_pool')(conv43)

        # vgg block 5
        x = Conv2D(512,
                   3,
                   padding='same',
                   activation='relu',
                   name='block5_conv1')(x)
        x = Conv2D(512,
                   3,
                   padding='same',
                   activation='relu',
                   name='block5_conv2')(x)
        x = Conv2D(512,
                   3,
                   padding='same',
                   activation='relu',
                   name='block5_conv3')(x)
        x = MaxPooling2D(3, 1, padding='same', name='block5_pool')(x)

        # vgg fc modified as conv
        conv6 = Conv2D(1024,
                       3,
                       dilation_rate=6,
                       activation='relu',
                       padding='same',
                       name='conv6')(x)
        conv7 = Conv2D(1024,
                       1,
                       activation='relu',
                       padding='same',
                       name='conv7')(conv6)

        # extra feature map layers
        extra1 = ConvBn(256, 1, name='extra1_shrink')(conv7)
        extra1 = ConvBn(512, 3, strides=2, padding='same',
                        name='extra1')(extra1)

        extra2 = ConvBn(128, 1, name='extra2_shrink')(extra1)
        extra2 = ConvBn(256, 3, strides=2, padding='same',
                        name='extra2')(extra2)

        extra3 = ConvBn(128, 1, name='extra3_shrink')(extra2)
        extra3 = ConvBn(256, 3, name='extra3')(extra3)

        extra4 = ConvBn(128, 1, name='extra4_shrink')(extra3)
        extra4 = ConvBn(256, 3, name='extra4')(extra4)

        extra5 = ConvBn(128, 1, name='extra5_shrink')(extra4)
        extra5 = ConvBn(256, 4, name='extra5')(extra5)

        # heads
        feature_maps = [conv43, conv7, extra1, extra2, extra3, extra4, extra5]
        confs, locs, anchors = [], [], []
        for i in range(len(feature_maps)):
            map = feature_maps[i]
            conf = self.conf_layers[i](map)
            loc = self.loc_layers[i](map)
            anchor = self.anchor_layers[i](map)
            confs.append(conf)
            locs.append(loc)
            anchors.append(anchor)
        confs_reshaped = [Reshape((-1, 1))(conf) for conf in confs]
        locs_reshaped = [Reshape((-1, 4))(loc) for loc in locs]
        anchors_reshaped = [Reshape((-1, 4))(db) for db in anchors]

        conf_concat = Concatenate(axis=1, name='scores')(confs_reshaped)
        loc_concat = Concatenate(axis=1, name='offsets')(locs_reshaped)
        anchor_concat = Concatenate(axis=1,
                                    name='default_boxes')(anchors_reshaped)

        self.model = Model(input_tensor,
                           [conf_concat, loc_concat, anchor_concat],
                           name='ssd_vgg16')
コード例 #29
0
def MobileNetV2(classes=1000,
                input_tensor=None,
                input_shape=(512, 512, 3),
                weights_info=None,
                OS=16,
                alpha=1.,
                include_top=True):
    """ Instantiates the Deeplabv3+ architecture

    Optionally loads weights pre-trained
    on PASCAL VOC or Cityscapes. This model is available for TensorFlow only.
    # Arguments
        classes: Integer, optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width
        weights_info: this dict is consisted of `classes` and `weghts`.
            `classes` is number of `weights` output units.
            `weights` is one of 'imagenet' (pre-training on ImageNet), 'pascal_voc', 'cityscapes',
            original weights path (pre-training on original data) or None (random initialization)
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1.
        include_top: Boolean, whether to include the fully-connected
            layer at the top of the network. Defaults to `True`.

    # Returns
        A Keras model instance.

    """

    if weights_info is not None:
        if weights_info.get("weights") is None:
            weights = None

        elif weights_info["weights"] in {
                'imagenet', 'pascal_voc', 'cityscapes', None
        }:
            weights = weights_info["weights"]

        elif os.path.exists(weights_info["weights"]) and weights_info.get(
                "classes") is not None:
            classes = int(weights_info["classes"])
            weights = weights_info["weights"]

        else:
            raise ValueError(
                'The `weights` should be either '
                '`None` (random initialization), `imagenet`, `pascal_voc`, `cityscapes`, '
                'original weights path (pre-training on original data), '
                'or the path to the weights file to be loaded and'
                '`classes` should be number of original weights output units')

    else:
        weights = None
        if classes is None:
            raise ValueError('`classes` should be any number')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        img_input = input_tensor

    # If input_shape is None, infer shape from input_tensor
    if backend.image_data_format() == 'channels_first':
        rows = input_shape[1]
        cols = input_shape[2]
    else:
        rows = input_shape[0]
        cols = input_shape[1]

    if rows == cols and rows in [96, 128, 160, 192, 224]:
        default_size = rows
    else:
        default_size = 224

    if weights == 'imagenet':
        if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of `0.35`, `0.50`, `0.75`, '
                             '`1.0`, `1.3` or `1.4` only.')

        if rows != cols or rows not in [96, 128, 160, 192, 224]:
            rows = 224

    OS = 8
    first_block_filters = _make_divisible(32 * alpha, 8)
    x = Conv2D(first_block_filters,
               kernel_size=3,
               strides=(2, 2),
               padding='same',
               use_bias=False,
               name='Conv')(img_input)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
    x = Activation(relu6, name='Conv_Relu6')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0,
                            skip_connection=False)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1,
                            skip_connection=False)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2,
                            skip_connection=True)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3,
                            skip_connection=False)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5,
                            skip_connection=True)

    # stride in block 6 changed from 2 -> 1, so we need to use rate = 2
    x = _inverted_res_block(
        x,
        filters=64,
        alpha=alpha,
        stride=1,  # 1!
        expansion=6,
        block_id=6,
        skip_connection=False)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            rate=2,
                            expansion=6,
                            block_id=7,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            rate=2,
                            expansion=6,
                            block_id=8,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            rate=2,
                            expansion=6,
                            block_id=9,
                            skip_connection=True)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            rate=2,
                            expansion=6,
                            block_id=10,
                            skip_connection=False)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            rate=2,
                            expansion=6,
                            block_id=11,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            rate=2,
                            expansion=6,
                            block_id=12,
                            skip_connection=True)

    x = _inverted_res_block(
        x,
        filters=160,
        alpha=alpha,
        stride=1,
        rate=2,  # 1!
        expansion=6,
        block_id=13,
        skip_connection=False)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            rate=4,
                            expansion=6,
                            block_id=14,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            rate=4,
                            expansion=6,
                            block_id=15,
                            skip_connection=True)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            rate=4,
                            expansion=6,
                            block_id=16,
                            skip_connection=False)

    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = Conv2D(last_block_filters,
               kernel_size=1,
               use_bias=False,
               name='Conv_1')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x)
    x = Activation(relu6, name='out_relu')(x)

    x = GlobalAveragePooling2D()(x)
    x = Dense(classes, activation='softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='mobilenetv2')

    # Load weights.
    if weights == 'imagenet':
        print("movilenetv2 load model imagenet")
        model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                      str(alpha) + '_' + str(rows) + '.h5')
        weight_path = BASE_WEIGHT_PATH + model_name
        weights_path = data_utils.get_file(model_name,
                                           weight_path,
                                           cache_subdir='models')
        model.load_weights(weights_path)
    elif not (weights in {'pascal_voc', 'cityscapes', None}):
        model.load_weights(weights)

    if include_top:
        return model
    else:
        # get last _inverted_res_block layer
        no_top_model = Model(inputs=model.input,
                             outputs=model.get_layer(index=-6).output)
        return no_top_model
コード例 #30
0
# Comienza la construcción del modelo Keras Sequential.
model = Sequential()

# Agrega una capa de entrada que es similar a un feed_dict en TensorFlow.
# Tenga en cuenta que la forma de entrada debe ser una tupla que contenga el tamaño de la imagen.
model.add(InputLayer(input_shape=img_shape_full))

# La entrada es una matriz aplanada con 784 elementos (img_size * img_size),
# pero las capas convolucionales esperan imágenes con forma (28, 28, 1), por tanto hacemos un reshape
model.add(Reshape(img_shape_full))

# Primera capa convolucional con ReLU-activation y max-pooling.
model.add(
    Conv2D(kernel_size=5,
           strides=1,
           filters=16,
           padding='same',
           activation='relu',
           name='layer_conv1'))
model.add(MaxPooling2D(pool_size=2, strides=2))

# Segunda capa convolucional con ReLU-activation y max-pooling.
model.add(
    Conv2D(kernel_size=5,
           strides=1,
           filters=36,
           padding='same',
           activation='relu',
           name='layer_conv2'))
model.add(MaxPooling2D(pool_size=2, strides=2))

# Aplanar la salida de 4 niveles de las capas convolucionales