def ssd_512(image_size, n_classes, mode='training', l2_regularization=0.0005, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 128, 256, 512], offsets=None, clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=True, subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=[2, 1, 0], confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): ''' Build a Keras model with SSD512 architecture, see references. The base network is a reduced atrous VGG-16, extended by the SSD architecture, as described in the paper. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network, the parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading trained weights, the parameters passed here must be the same as the ones used to produce the trained weights. Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. Note: Requires Keras v2.0 or later. Currently works only with the TensorFlow backend (v1.0 or later). Arguments: image_size (tuple): The input image size in the format `(height, width, channels)`. n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, non-maximum suppression, and top-k filtering. The difference between latter two modes is that 'inference' follows the exact procedure of the original Caffe implementation, while 'inference_fast' uses a faster prediction decoding procedure. l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. Set to zero to deactivate L2-regularization. min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all prediction layers. aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer. This allows you to set the aspect ratios for each predictor layer individually, which is the case for the original SSD512 implementation. If a list is passed, it overrides `aspect_ratios_global`. two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by its respective variance value. coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input image channels should be swapped. confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the selection process being done by the non-maximum suppression stage, while a larger value will result in a larger part of the selection process happening in the confidence thresholding stage. iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers to the box's confidence score. top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the non-maximum suppression stage. nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since you can always get their sizes easily via the Keras API, but it's convenient and less error-prone to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the spatial dimensions of the predictor layers), for inference you don't need them. Returns: model: The Keras SSD512 model. predictor_sizes (optional): A Numpy array containing the `(height, width)` portion of the output tensor shape for each convolutional predictor layer. During training, the generator function needs this in order to transform the ground truth labels into tensors of identical structure as the output tensors of the model, which is in turn needed for the cost function. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 7 # The number of predictor conv layers in the network is 7 for the original SSD512 n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(variances) != 4: raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Define functions for the Lambda layers below. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]] ], axis=-1) elif len(swap_channels) == 4: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]] ], axis=-1) ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) conv8_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv8_padding')(conv8_1) conv8_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) conv9_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv9_padding')(conv9_1) conv9_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) conv10_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv10_1')(conv9_2) conv10_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv10_padding')(conv10_1) conv10_2 = Conv2D(256, (4, 4), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv10_2')(conv10_1) # Feed conv4_3 into the L2 normalization layer conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) ### Build the convolutional predictor layers on top of the base network # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes` # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)` conv4_3_norm_mbox_conf = Conv2D( n_boxes[0] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_conf')(conv4_3_norm) fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_conf')(fc7) conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_conf')(conv6_2) conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_conf')(conv7_2) conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_conf')(conv8_2) conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_conf')(conv9_2) conv10_2_mbox_conf = Conv2D(n_boxes[6] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv10_2_mbox_conf')(conv10_2) # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_loc')(conv4_3_norm) fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_loc')(fc7) conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_loc')(conv6_2) conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_loc')(conv7_2) conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_loc')(conv8_2) conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_loc')(conv9_2) conv10_2_mbox_loc = Conv2D(n_boxes[6] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv10_2_mbox_loc')(conv10_2) ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names) # Output shape of anchors: `(batch, height, width, n_boxes, 8)` conv4_3_norm_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) conv6_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) conv7_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) conv8_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[4], this_offsets=offsets[4], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) conv9_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[5], this_offsets=offsets[5], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) conv10_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[6], next_scale=scales[7], aspect_ratios=aspect_ratios[6], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[6], this_offsets=offsets[6], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv10_2_mbox_priorbox')(conv10_2_mbox_loc) ### Reshape # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them conv4_3_norm_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) fc7_mbox_conf_reshape = Reshape( (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) conv6_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) conv7_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) conv8_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) conv9_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) conv10_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv10_2_mbox_conf_reshape')(conv10_2_mbox_conf) # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss conv4_3_norm_mbox_loc_reshape = Reshape( (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) fc7_mbox_loc_reshape = Reshape((-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) conv6_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) conv7_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) conv8_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) conv9_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) conv10_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv10_2_mbox_loc_reshape')(conv10_2_mbox_loc) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` conv4_3_norm_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) fc7_mbox_priorbox_reshape = Reshape( (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) conv6_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) conv7_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) conv8_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) conv9_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) conv10_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv10_2_mbox_priorbox_reshape')(conv10_2_mbox_priorbox) ### Concatenate the predictions from the different layers # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1, the number of boxes per layer # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) mbox_conf = Concatenate(axis=1, name='mbox_conf')([ conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape, conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape, conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape, conv10_2_mbox_conf_reshape ]) # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) mbox_loc = Concatenate(axis=1, name='mbox_loc')([ conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape, conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape, conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape, conv10_2_mbox_loc_reshape ]) # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8) mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([ conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape, conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape, conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape, conv10_2_mbox_priorbox_reshape ]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first mbox_conf_softmax = Activation('softmax', name='mbox_conf_softmax')(mbox_conf) # Concatenate the class and box predictions and the anchors to one large predictions vector # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [mbox_conf_softmax, mbox_loc, mbox_priorbox]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetectionsFast( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: predictor_sizes = np.array([ conv4_3_norm_mbox_conf._keras_shape[1:3], fc7_mbox_conf._keras_shape[1:3], conv6_2_mbox_conf._keras_shape[1:3], conv7_2_mbox_conf._keras_shape[1:3], conv8_2_mbox_conf._keras_shape[1:3], conv9_2_mbox_conf._keras_shape[1:3], conv10_2_mbox_conf._keras_shape[1:3] ]) return model, predictor_sizes else: return model
def build_model(self): vae_input = Input(shape=self.input_dim) #print("vae_input shape " + str(vae_input.shape)) vae_c1 = Conv2D(filters=32, kernel_size=4, strides=2, activation='relu')(vae_input) vae_c2 = Conv2D(filters=64, kernel_size=4, strides=2, activation='relu')(vae_c1) vae_c3 = Conv2D(filters=64, kernel_size=4, strides=2, activation='relu')(vae_c2) vae_c4 = Conv2D(filters=128, kernel_size=4, strides=2, activation='relu')(vae_c3) #print("vae_c1 shape " + str(vae_c1.shape)) #print("vae_c2 shape " + str(vae_c2.shape)) #print("vae_c3 shape " + str(vae_c3.shape)) #print("vae_c4 shape " + str(vae_c4.shape)) vae_z_in = Flatten()(vae_c4) #print("vae_z_in shape " + str(vae_z_in.shape)) vae_z_mean = Dense(self.z_dim)(vae_z_in) vae_z_log_var = Dense(self.z_dim)(vae_z_in) #print("vae_z_mean shape " + str(vae_z_mean.shape)) #print("vae_z_log_var shape " + str(vae_z_log_var.shape)) vae_z = Lambda(self.sampling)([vae_z_mean, vae_z_log_var]) vae_z_input = Input(shape=(self.z_dim, )) print("vae_z shape " + str(vae_z.shape)) #print("vae_z_input shape " + str(vae_z_input.shape)) vae_dense = Dense(1024) vae_dense_model = vae_dense(vae_z) #print("vae_dense_model shape " + str(vae_dense_model.shape)) vae_z_out = Reshape((1, 1, self.dense_size)) vae_z_out_model = vae_z_out(vae_dense_model) #print("vae_z_out_model shape " + str(vae_z_out_model.shape)) vae_d1 = Conv2DTranspose(filters=64, kernel_size=(3, 4), strides=2, activation='relu') vae_d2 = Conv2DTranspose(filters=64, kernel_size=(9, 11), strides=3, activation='relu') vae_d3 = Conv2DTranspose(filters=32, kernel_size=(4, 4), strides=4, activation='relu') vae_d4 = Conv2DTranspose(filters=1, kernel_size=(4, 4), strides=4, activation='sigmoid') vae_d1_model = vae_d1(vae_z_out_model) vae_d2_model = vae_d2(vae_d1_model) vae_d3_model = vae_d3(vae_d2_model) vae_d4_model = vae_d4(vae_d3_model) #print("vae_d1_model shape " + str(vae_d1_model.shape)) #print("vae_d2_model shape " + str(vae_d2_model.shape)) #print("vae_d3_model shape " + str(vae_d3_model.shape)) #print("vae_d4_model shape " + str(vae_d4_model.shape)) vae_dense_decoder = vae_dense(vae_z_input) vae_z_out_decoder = vae_z_out(vae_dense_decoder) #print("vae_z_out_decoder shape " + str(vae_z_out_decoder.shape)) vae_d1_decoder = vae_d1(vae_z_out_decoder) vae_d2_decoder = vae_d2(vae_d1_decoder) vae_d3_decoder = vae_d3(vae_d2_decoder) vae_d4_decoder = vae_d4(vae_d3_decoder) #print("vae_d1_decoder shape " + str(vae_d1_decoder.shape)) #print("vae_d2_decoder shape " + str(vae_d2_decoder.shape)) #print("vae_d3_decoder shape " + str(vae_d3_decoder.shape)) #print("vae_d4_decoder shape " + str(vae_d4_decoder.shape)) # Models vae = Model(vae_input, vae_d4_model) vae_encoder = Model(vae_input, vae_z) vae_decoder = Model(vae_z_input, vae_d4_decoder) def vae_r_loss(y_true, y_pred): y_true_flat = K.flatten(y_true) y_pred_flat = K.flatten(y_pred) return 10 * K.mean(K.square(y_true_flat - y_pred_flat), axis=-1) def vae_kl_loss(y_true, y_pred): return -0.5 * K.mean(1 + vae_z_log_var - K.square(vae_z_mean) - K.exp(vae_z_log_var), axis=-1) def vae_loss(y_true, y_pred): return vae_r_loss(y_true, y_pred) + vae_kl_loss(y_true, y_pred) vae.compile(optimizer='rmsprop', loss=vae_loss, metrics=[vae_r_loss, vae_kl_loss]) vae.summary() return (vae, vae_encoder, vae_decoder)
def get_test_model_full(): """Returns a maximally complex test model, using all supported layer types with different parameter combination. """ input_shapes = [ (26, 28, 3), (4, 4, 3), (4, 4, 3), (4, ), (2, 3), (27, 29, 1), (17, 1), (17, 4), ] inputs = [Input(shape=s) for s in input_shapes] outputs = [] for inp in inputs[6:8]: for padding in ['valid', 'same']: for s in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4): outputs.append( Conv1D(out_channels, s, padding=padding, dilation_rate=d)(inp)) for padding_size in range(0, 5): outputs.append(ZeroPadding1D(padding_size)(inp)) for crop_left in range(0, 2): for crop_right in range(0, 2): outputs.append(Cropping1D((crop_left, crop_right))(inp)) for upsampling_factor in range(1, 5): outputs.append(UpSampling1D(upsampling_factor)(inp)) for padding in ['valid', 'same']: for pool_factor in range(1, 6): for s in range(1, 4): outputs.append( MaxPooling1D(pool_factor, strides=s, padding=padding)(inp)) outputs.append( AveragePooling1D(pool_factor, strides=s, padding=padding)(inp)) outputs.append(GlobalMaxPooling1D()(inp)) outputs.append(GlobalAveragePooling1D()(inp)) for inp in [inputs[0], inputs[5]]: for padding in ['valid', 'same']: for h in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4): outputs.append( Conv2D(out_channels, (h, 1), padding=padding, dilation_rate=(d, 1))(inp)) outputs.append( SeparableConv2D(out_channels, (h, 1), padding=padding, dilation_rate=(d, 1))(inp)) for sy in range(1, 4): outputs.append( Conv2D(out_channels, (h, 1), strides=(1, sy), padding=padding)(inp)) outputs.append( SeparableConv2D(out_channels, (h, 1), strides=(sy, sy), padding=padding)(inp)) for sy in range(1, 4): outputs.append( MaxPooling2D((h, 1), strides=(1, sy), padding=padding)(inp)) for w in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4) if sy == 1 else [1]: outputs.append( Conv2D(out_channels, (1, w), padding=padding, dilation_rate=(1, d))(inp)) outputs.append( SeparableConv2D(out_channels, (1, w), padding=padding, dilation_rate=(1, d))(inp)) for sx in range(1, 4): outputs.append( Conv2D(out_channels, (1, w), strides=(sx, 1), padding=padding)(inp)) outputs.append( SeparableConv2D(out_channels, (1, w), strides=(sx, sx), padding=padding)(inp)) for sx in range(1, 4): outputs.append( MaxPooling2D((1, w), strides=(1, sx), padding=padding)(inp)) outputs.append(ZeroPadding2D(2)(inputs[0])) outputs.append(ZeroPadding2D((2, 3))(inputs[0])) outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[0])) outputs.append(Cropping2D(2)(inputs[0])) outputs.append(Cropping2D((2, 3))(inputs[0])) outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[0])) for y in range(1, 3): for x in range(1, 3): outputs.append(UpSampling2D(size=(y, x))(inputs[0])) outputs.append(GlobalAveragePooling2D()(inputs[0])) outputs.append(GlobalMaxPooling2D()(inputs[0])) outputs.append(AveragePooling2D((2, 2))(inputs[0])) outputs.append(MaxPooling2D((2, 2))(inputs[0])) outputs.append(UpSampling2D((2, 2))(inputs[0])) outputs.append(keras.layers.concatenate([inputs[0], inputs[0]])) outputs.append(Dropout(0.5)(inputs[0])) outputs.append(BatchNormalization()(inputs[0])) outputs.append(BatchNormalization(center=False)(inputs[0])) outputs.append(BatchNormalization(scale=False)(inputs[0])) outputs.append(Conv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(Conv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(SeparableConv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(SeparableConv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(Dense(2, use_bias=True)(inputs[3])) outputs.append(Dense(2, use_bias=False)(inputs[3])) shared_conv = Conv2D(1, (1, 1), padding='valid', name='shared_conv', activation='relu') up_scale_2 = UpSampling2D((2, 2)) x1 = shared_conv(up_scale_2(inputs[1])) # (1, 8, 8) x2 = shared_conv(up_scale_2(inputs[2])) # (1, 8, 8) x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[2])) # (1, 8, 8) x = keras.layers.concatenate([x1, x2, x3]) # (3, 8, 8) outputs.append(x) x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x) # (3, 8, 8) outputs.append(x) x = Dropout(0.5)(x) outputs.append(x) x = keras.layers.concatenate( [MaxPooling2D((2, 2))(x), AveragePooling2D((2, 2))(x)]) # (6, 4, 4) outputs.append(x) x = Flatten()(x) # (1, 1, 96) x = Dense(4, use_bias=False)(x) outputs.append(x) x = Dense(3)(x) # (1, 1, 3) outputs.append(x) intermediate_input_shape = (3, ) intermediate_in = Input(intermediate_input_shape) intermediate_x = intermediate_in intermediate_x = Dense(8)(intermediate_x) intermediate_x = Dense(5)(intermediate_x) intermediate_model = Model(inputs=[intermediate_in], outputs=[intermediate_x], name='intermediate_model') intermediate_model.compile(loss='mse', optimizer='nadam') x = intermediate_model(x) # (1, 1, 5) intermediate_model_2 = Sequential() intermediate_model_2.add(Dense(7, input_shape=(5, ))) intermediate_model_2.add(Dense(5)) intermediate_model_2.compile(optimizer='rmsprop', loss='categorical_crossentropy') x = intermediate_model_2(x) # (1, 1, 5) x = Dense(3)(x) # (1, 1, 3) shared_activation = Activation('tanh') outputs = outputs + [ Activation('tanh')(inputs[3]), Activation('hard_sigmoid')(inputs[3]), Activation('selu')(inputs[3]), Activation('sigmoid')(inputs[3]), Activation('softplus')(inputs[3]), Activation('softmax')(inputs[3]), Activation('relu')(inputs[3]), LeakyReLU()(inputs[3]), ELU()(inputs[3]), shared_activation(inputs[3]), inputs[4], inputs[1], x, shared_activation(x), ] print('Model has {} outputs.'.format(len(outputs))) model = Model(inputs=inputs, outputs=outputs, name='test_model_full') model.compile(loss='mse', optimizer='nadam') # fit to dummy data training_data_size = 1 batch_size = 1 epochs = 10 data_in = generate_input_data(training_data_size, input_shapes) data_out = generate_output_data(training_data_size, outputs) model.fit(data_in, data_out, epochs=epochs, batch_size=batch_size) return model
def build_model(): inputs = Input(shape=(NORM_H, NORM_W, 3)) # Block 1__ x = Conv2D( 64, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block1_conv1", )(inputs) x = Activation("relu")(x) x = Conv2D( 64, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block1_conv2", )(x) x = Activation("relu")(x) x = MaxPooling2D(strides=(2, 2), name="block1_pool")(x) # Block 2 x = Conv2D( 128, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block2_conv1", )(x) x = Activation("relu")(x) x = Conv2D( 128, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block2_conv2", )(x) x = Activation("relu")(x) x = MaxPooling2D(strides=(2, 2), name="block2_pool")(x) # Block 3 x = Conv2D( 256, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block3_conv1", )(x) x = Activation("relu")(x) x = Conv2D( 256, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block3_conv2", )(x) x = Activation("relu")(x) x = Conv2D( 256, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block3_conv3", )(x) x = Activation("relu")(x) x = MaxPooling2D(strides=(2, 2), name="block3_pool")(x) # Block 4 x = Conv2D( 512, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block4_conv1", )(x) x = Activation("relu")(x) x = Conv2D( 512, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block4_conv2", )(x) x = Activation("relu")(x) x = Conv2D( 512, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block4_conv3", )(x) x = Activation("relu")(x) x = MaxPooling2D(strides=(2, 2), name="block4_pool")(x) # Block 5 x = Conv2D( 512, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block5_conv1", )(x) x = Activation("relu")(x) x = Conv2D( 512, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block5_conv2", )(x) x = Activation("relu")(x) x = Conv2D( 512, (3, 3), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), name="block5_conv3", )(x) x = Activation("relu")(x) x = MaxPooling2D(strides=(2, 2), name="block5_pool")(x) # Flatten x = Flatten(name="Flatten")(x) # Dimensions branch dimensions = Dense(512, name="d_fc_1")(x) dimensions = LeakyReLU(alpha=0.1)(dimensions) dimensions = Dropout(0.5)(dimensions) dimensions = Dense(3, name="d_fc_2")(dimensions) dimensions = LeakyReLU(alpha=0.1, name="dimensions")(dimensions) # Orientation branch orientation = Dense(256, name="o_fc_1")(x) orientation = LeakyReLU(alpha=0.1)(orientation) orientation = Dropout(0.5)(orientation) orientation = Dense(BIN * 2, name="o_fc_2")(orientation) orientation = LeakyReLU(alpha=0.1)(orientation) orientation = Reshape((BIN, -1))(orientation) orientation = Lambda(VGG16.l2_normalize, name="orientation")(orientation) # Confidence branch confidence = Dense(256, name="c_fc_1")(x) confidence = LeakyReLU(alpha=0.1)(confidence) confidence = Dropout(0.5)(confidence) confidence = Dense(BIN, activation="softmax", name="confidence")(confidence) # Build model return tf.keras.Model(inputs, [dimensions, orientation, confidence])
def cifar100_student_strong(n_classes: int, input_shape=None, input_tensor=None, weights_path: Union[None, str] = None) -> Model: """ Defines a cifar100 strong student network. :param n_classes: the number of classes. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained cifar10 tiny network's weights. :return: Keras functional Model. """ inputs = create_inputs(input_shape, input_tensor) # Define a weight decay for the regularisation. weight_decay = 1e-4 # Block1. x = Conv2D(32, (3, 3), padding='same', activation='elu', name='block1_conv1', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(name='block1_batch-norm1')(x) x = Conv2D(64, (3, 3), padding='same', activation='elu', name='block1_conv2', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block1_batch-norm2')(x) x = MaxPooling2D(pool_size=(2, 2), name='block1_pool')(x) x = Dropout(0.2, name='block1_dropout', seed=0)(x) # Block2. x = Conv2D(128, (3, 3), padding='same', activation='elu', name='block2_conv1', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block2_batch-norm1')(x) x = Conv2D(128, (3, 3), padding='same', activation='elu', name='block2_conv2', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block2_batch-norm2')(x) x = MaxPooling2D(pool_size=(2, 2), name='block2_pool')(x) x = Dropout(0.3, name='block2_dropout', seed=0)(x) # Block3. x = Conv2D(256, (3, 3), padding='same', activation='elu', name='block3_conv1', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block3_batch-norm1')(x) x = Conv2D(256, (3, 3), padding='same', activation='elu', name='block3_conv2', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block3_batch-norm2')(x) x = MaxPooling2D(pool_size=(2, 2), name='block3_pool')(x) x = Dropout(0.4, name='block3_dropout', seed=0)(x) # Add top layers. x = Flatten()(x) x = Dense(n_classes)(x) outputs = Activation('softmax', name='softmax')(x) # Create model. model = Model(inputs, outputs, name='cifar100_student_strong') # Load weights, if they exist. load_weights(weights_path, model) return model
def bn_feature_net_2D(receptive_field=61, input_shape=(256, 256, 1), n_features=3, n_channels=1, reg=1e-5, n_conv_filters=64, n_dense_filters=200, VGG_mode=False, init='he_normal', norm_method='std', location=False, dilated=False, padding=False, padding_mode='reflect', multires=False, include_top=True): # Create layers list (x) to store all of the layers. # We need to use the functional API to enable the multiresolution mode x = [] win = (receptive_field - 1) // 2 if dilated: padding = True if K.image_data_format() == 'channels_first': channel_axis = 1 row_axis = 2 col_axis = 3 if not dilated: input_shape = (n_channels, receptive_field, receptive_field) else: row_axis = 1 col_axis = 2 channel_axis = -1 if not dilated: input_shape = (receptive_field, receptive_field, n_channels) x.append(Input(shape=input_shape)) x.append(ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1])) if padding: if padding_mode == 'reflect': x.append(ReflectionPadding2D(padding=(win, win))(x[-1])) elif padding_mode == 'zero': x.append(ZeroPadding2D(padding=(win, win))(x[-1])) if location: x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1])) x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]])) if multires: layers_to_concat = [] rf_counter = receptive_field block_counter = 0 d = 1 while rf_counter > 4: filter_size = 3 if rf_counter % 2 == 0 else 4 x.append(Conv2D(n_conv_filters, (filter_size, filter_size), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) block_counter += 1 rf_counter -= filter_size - 1 if block_counter % 2 == 0: if dilated: x.append(DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1])) d *= 2 else: x.append(MaxPool2D(pool_size=(2, 2))(x[-1])) if VGG_mode: n_conv_filters *= 2 rf_counter = rf_counter // 2 if multires: layers_to_concat.append(len(x) - 1) if multires: c = [] for l in layers_to_concat: output_shape = x[l].get_shape().as_list() target_shape = x[-1].get_shape().as_list() row_crop = int(output_shape[row_axis] - target_shape[row_axis]) if row_crop % 2 == 0: row_crop = (row_crop // 2, row_crop // 2) else: row_crop = (row_crop // 2, row_crop // 2 + 1) col_crop = int(output_shape[col_axis] - target_shape[col_axis]) if col_crop % 2 == 0: col_crop = (col_crop // 2, col_crop // 2) else: col_crop = (col_crop // 2, col_crop // 2 + 1) cropping = (row_crop, col_crop) c.append(Cropping2D(cropping=cropping)(x[l])) x.append(Concatenate(axis=channel_axis)(c)) x.append(Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append(TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append(TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) if not dilated: x.append(Flatten()(x[-1])) if include_top: x.append(Softmax(axis=channel_axis)(x[-1])) model = Model(inputs=x[0], outputs=x[-1]) return model
def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3), classes=21, backbone='mobilenetv2', OS=16, alpha=1., activation=None): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC or Cityscapes. This model is available for TensorFlow only. # Arguments weights: one of 'pascal_voc' (pre-trained on pascal voc), 'cityscapes' (pre-trained on cityscape) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width classes: number of desired classes. PASCAL VOC has 21 classes, Cityscapes has 19 classes. If number of classes not aligned with the weights used, last layer is initialized randomly backbone: backbone to use. one of {'xception','mobilenetv2'} activation: optional activation to add to the top of the network. One of 'softmax', 'sigmoid' or None OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1. # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', 'cityscapes', None}): raise ValueError( 'The `weights` argument should be either ' '`None` (random initialization), `pascal_voc`, or `cityscapes` ' '(pre-trained on PASCAL VOC)') if not (backbone in {'xception', 'mobilenetv2'}): raise ValueError('The `backbone` argument should be either ' '`xception` or `mobilenetv2` ') if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = input_tensor if backbone == 'xception': if OS == 8: entry_block3_stride = 1 middle_block_rate = 2 # ! Not mentioned in paper, but required exit_block_rates = (2, 4) atrous_rates = (12, 24, 36) else: entry_block3_stride = 2 middle_block_rate = 1 exit_block_rates = (1, 2) atrous_rates = (6, 12, 18) x = Conv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(img_input) x = BatchNormalization(name='entry_flow_conv1_1_BN')(x) x = Activation(tf.nn.relu)(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = BatchNormalization(name='entry_flow_conv1_2_BN')(x) x = Activation(tf.nn.relu)(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=entry_block3_stride, depth_activation=False) for i in range(16): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=middle_block_rate, depth_activation=False) x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=1, rate=exit_block_rates[0], depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=exit_block_rates[1], depth_activation=True) else: OS = 8 first_block_filters = _make_divisible(32 * alpha, 8) x = Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(img_input) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) x = Activation(tf.nn.relu6, name='Conv_Relu6')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3, skip_connection=False) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5, skip_connection=True) # stride in block 6 changed from 2 -> 1, so we need to use rate = 2 x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, # 1! expansion=6, block_id=6, skip_connection=False) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=7, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=8, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=9, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=10, skip_connection=False) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=11, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=12, skip_connection=True) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, rate=2, # 1! expansion=6, block_id=13, skip_connection=False) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=14, skip_connection=True) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=15, skip_connection=True) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4, expansion=6, block_id=16, skip_connection=False) # end of feature extractor # branching for Atrous Spatial Pyramid Pooling # Image Feature branch shape_before = tf.shape(x) b4 = GlobalAveragePooling2D()(x) # from (b_size, channels)->(b_size, 1, 1, channels) b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4) b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation(tf.nn.relu)(b4) # upsample. have to use compat because of the option align_corners size_before = tf.keras.backend.int_shape(x) b4 = Lambda(lambda x: tf.compat.v1.image.resize( x, size_before[1:3], method='bilinear', align_corners=True))(b4) # simple 1x1 b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation(tf.nn.relu, name='aspp0_activation')(b0) # there are only 2 branches in mobilenetV2. not sure why if backbone == 'xception': # rate = 6 (12) b1 = SepConv_BN(x, 256, 'aspp1', rate=atrous_rates[0], depth_activation=True, epsilon=1e-5) # rate = 12 (24) b2 = SepConv_BN(x, 256, 'aspp2', rate=atrous_rates[1], depth_activation=True, epsilon=1e-5) # rate = 18 (36) b3 = SepConv_BN(x, 256, 'aspp3', rate=atrous_rates[2], depth_activation=True, epsilon=1e-5) # concatenate ASPP branches & project x = Concatenate()([b4, b0, b1, b2, b3]) else: x = Concatenate()([b4, b0]) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = Activation(tf.nn.relu)(x) x = Dropout(0.1)(x) # DeepLab v.3+ decoder if backbone == 'xception': # Feature projection # x4 (x2) block skip_size = tf.keras.backend.int_shape(skip1) x = Lambda(lambda xx: tf.compat.v1.image.resize( xx, skip_size[1:3], method='bilinear', align_corners=True))(x) dec_skip1 = Conv2D(48, (1, 1), padding='same', use_bias=False, name='feature_projection0')(skip1) dec_skip1 = BatchNormalization(name='feature_projection0_BN', epsilon=1e-5)(dec_skip1) dec_skip1 = Activation(tf.nn.relu)(dec_skip1) x = Concatenate()([x, dec_skip1]) x = SepConv_BN(x, 256, 'decoder_conv0', depth_activation=True, epsilon=1e-5) x = SepConv_BN(x, 256, 'decoder_conv1', depth_activation=True, epsilon=1e-5) # you can use it with arbitary number of classes if (weights == 'pascal_voc' and classes == 21) or (weights == 'cityscapes' and classes == 19): last_layer_name = 'logits_semantic' else: last_layer_name = 'custom_logits_semantic' x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x) size_before3 = tf.keras.backend.int_shape(img_input) x = Lambda(lambda xx: tf.compat.v1.image.resize( xx, size_before3[1:3], method='bilinear', align_corners=True))(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input if activation in {'softmax', 'sigmoid'}: x = tf.keras.layers.Activation(activation)(x) model = Model(inputs, x, name='deeplabv3plus') # load weights if weights == 'pascal_voc': if backbone == 'xception': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_X, cache_subdir='models') else: weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_MOBILE, cache_subdir='models') model.load_weights(weights_path, by_name=True) elif weights == 'cityscapes': if backbone == 'xception': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5', WEIGHTS_PATH_X_CS, cache_subdir='models') else: weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5', WEIGHTS_PATH_MOBILE_CS, cache_subdir='models') model.load_weights(weights_path, by_name=True) return model
# Step 1: Pre-process the data from tensorflow.python.keras.datasets import cifar10 (X_train, y_train), (X_test, y_test) = cifar10.load_data() X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train = X_train / 255.0 X_test = X_test / 255.0 y_train = keras.utils.to_categorical(y_train, n_classes) y_test = keras.utils.to_categorical(y_test, n_classes) # Step 2: Create the Model model = Sequential() model.add( Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3), padding='same')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(n_classes, activation='softmax')) #print(model.summary()) # Step 3: Compile the Model model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
def _build_normal_block(x, config): x = Conv2D(**config)(x) x = BatchNormalization()(x) x = Activation("relu")(x) return x
x = x/255.0 #using for loop to change 3 parameters using 3 different value and search the best settings out of the 3 different value for each parameter dense_layers = [0, 1, 2] layer_sizes = [32, 64, 128] conv_layers = [1, 2, 3] for dense_layer in dense_layers: for layer_size in layer_sizes: for conv_layer in conv_layers: name = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time())) print(name) model = Sequential() model.add(Conv2D(layer_size, (3,3), input_shape = x.shape[1:])) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2,2))) for l in range(conv_layer-1): model.add(Conv2D(layer_size, (3,3))) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Flatten()) for l in range(dense_layer): model.add(Dense(layer_size)) model.add(Activation("relu")) model.add(Dense(1)) model.add(Activation('sigmoid'))
p2.ChangeDutyCycle(7.5) #quay 90 do time.sleep(1) img_rows = 128 img_cols = 128 num_channel = 1 num_epoch = 20 # Define the number of classes num_classes = 3 labels_name = {'center': 0, 'left': 1, 'right': 2} model = Sequential() model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1))) model.add(Conv2D(32, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) # Viewing model_configuration
#Parametrizar la forma de imagenes num_chanels=1 #RGB, HSV -> num_chanels=3 img_shape=(img_size,img_size,num_chanels) num_clases=17 limiteImagenesPrueba=167 imagenes,etiquetas,probabilidades=cargarDatos("dataset/",num_clases,limiteImagenesPrueba) model=Sequential() #Capa entrada model.add(InputLayer(input_shape=(img_size_flat,))) #Reformar imagen model.add(Reshape(img_shape)) #Capas convolucionales model.add(Conv2D(kernel_size=5,strides=1,filters=16,padding='same',activation='relu',name='capa_convolucion_1')) model.add(MaxPooling2D(pool_size=2,strides=2)) model.add(Conv2D(kernel_size=5,strides=1,filters=36,padding='same',activation='relu',name='capa_convolucion_2')) model.add(MaxPooling2D(pool_size=2,strides=2)) model.add(Conv2D(kernel_size=5,strides=1,filters=48,padding='same',activation='relu',name='capa_convolucion_3')) model.add(MaxPooling2D(pool_size=2,strides=2)) #Aplanar imagen model.add(Flatten()) #Capa densa model.add(Dense(128,activation='relu')) #Capa salida
def train(x_train, y_train, x_test, y_test, epochs): # calculate classes if np.unique(y_train).shape[0] == np.unique(y_test).shape[0]: # num_classes = np.unique(y_train).shape[0] else: print('Error in class data...') return -2 # set validation data '''val_size = int(0.1 * x_train.shape[0]) r = np.random.randint(0, x_train.shape[0], size=val_size) x_val = x_train[r, :, :] y_val = y_train[r] x_train = np.delete(x_train, r, axis=0) y_train = np.delete(y_train, r, axis=0)''' step = int(x_train.shape[0] * 0.005) length = int(x_train.shape[0] * 0.1 * 0.005) r = [] for i in range(0, x_train.shape[0] - length, step): r.extend(range(i, i + length)) x_val = x_train[r, :, :] y_val = y_train[r] x_train = np.delete(x_train, r, axis=0) y_train = np.delete(y_train, r, axis=0) print('\nInitializing CNN2D...') print('\nclasses:', num_classes) print('x train shape:', x_train.shape), print('x val shape:', x_val.shape), print('x test shape:', x_test.shape) print('y train shape:', y_train.shape), print('y val shape:', y_val.shape), print('y test shape:', y_test.shape) print("\nTrain split with mean|std {:.2f}|{:.2f}".format( np.mean(x_train), np.std(x_train))) print("Test split with mean|std {:.2f}|{:.2f}".format( np.mean(x_test), np.std(x_test))) # shape data x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1) x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], x_val.shape[2], 1) x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1) y_train = tf.keras.utils.to_categorical(y_train, num_classes) y_val = tf.keras.utils.to_categorical(y_val, num_classes) y_test = tf.keras.utils.to_categorical(y_test, num_classes) # define the model activation = 'elu' regularizer = 0.0000 dropout = 0.25 # preprocessing ''' offset = 1.0 * np.std(x_train) dc0 = (x) dc1 = GaussianNoise(offset*0.1)(x) dc2 = GaussianDropout(dropout)(x) dc3 = Lambda(lambda r: r + __import__('keras').backend.random_uniform((1,), -offset, offset))(x) dc4 = Lambda(lambda r: r + __import__('keras').backend.random_uniform((1,), -offset, offset))(x) m = Concatenate()([dc0, dc1, dc2, dc3, dc4]) m = Lambda(lambda r: r - __import__('keras').backend.mean(r))(x) ''' # sequential model = Sequential() model.add( Conv2D(16, kernel_size=(3, 3), strides=(2, 1), activation='elu', kernel_regularizer=regularizers.l2(regularizer), input_shape=(x_train.shape[1], x_train.shape[2], 1))) model.add(EntropyPooling2D(pool_size=(2, 2))) model.add(Dropout(dropout)) model.add( Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation='elu', kernel_regularizer=regularizers.l2(regularizer))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(dropout)) model.add( Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='elu', kernel_regularizer=regularizers.l2(regularizer))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(dropout)) # model.add(Conv2D(128, kernel_size=(3, 3), strides=(1, 1), activation='elu', kernel_regularizer=regularizers.l2(regularizer))) # model.add(MaxPooling2D(pool_size=(1, 2))) # model.add(Dropout(dropout)) model.add(Flatten()) model.add( Dense(64, activation='elu', kernel_regularizer=regularizers.l2(regularizer))) model.add(Dropout(dropout)) model.add(Dense(num_classes, activation='softmax')) # functional ''' x = Input((x_train.shape[1], x_train.shape[2], x_train.shape[3])) m = Conv2D(16, 3, activation=activation , kernel_regularizer=regularizers.l2(regularizer))(x) m = EntropyPooling2D((2, 2))(m) m = Dropout(dropout)(m) m = Conv2D(32, 3, activation=activation, kernel_regularizer=regularizers.l2(regularizer))(m) m = EntropyPooling2D((2, 2))(m) m = Dropout(dropout)(m) m = Conv2D(64, 3, activation=activation, kernel_regularizer=regularizers.l2(regularizer))(m) m = EntropyPooling2D((2, 2))(m) m = Dropout(dropout)(m) if x_train.shape[1] < 50: # m = Flatten()(m) else: m = Conv2D(128, 3, activation=activation, kernel_regularizer=regularizers.l2(regularizer))(m) m = GlobalAveragePooling2D()(m) m = Dropout(dropout)(m) m = (Dense(64, activation=activation, kernel_regularizer=regularizers.l2(regularizer)))(m) m = Dropout(dropout)(m) y = Dense(num_classes, activation='softmax')(m) model = Model(inputs=[x], outputs=[y]) ''' # summarize model for i in range(0, len(model.layers)): if i == 0: plot_model(model, to_file='Models\\model_cnn2d.png') # f = open('Models\\model_cnn2d.txt', 'w') # print(' ') # print('{}. Layer {} with input / output shapes: {} / {}'.format(i, model.layers[i].name, model.layers[i].input_shape, model.layers[i].output_shape)) # f.write('{}. Layer {} with input / output shapes: {} / {} \n'.format(i, model.layers[i].name, model.layers[i].input_shape, model.layers[i].output_shape)) if i == len(model.layers) - 1: # f.close() print(' ') model.summary() # compile, fit evaluate callback = [ callbacks.EarlyStopping(monitor='val_acc', min_delta=0.01, patience=10, restore_best_weights=True) ] model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=256, epochs=epochs, verbose=2, validation_data=(x_val, y_val), callbacks=callback) score = model.evaluate(x_test, y_test, verbose=2) # evaluate on larger frames aggr_size = 5 for i in range(0, y_test.shape[0] - aggr_size, aggr_size): if i == 0: y_pred = model.predict(x_test) y_pred = np.argmax(y_pred, axis=1) y_test = np.argmax(y_test, axis=1) y_aggr_test = [] y_aggr_pred = [] if np.unique(y_test[i:i + aggr_size]).shape[0] == 1: y_aggr_test.append(stats.mode(y_test[i:i + aggr_size])[0][0]) y_aggr_pred.append(stats.mode(y_pred[i:i + aggr_size])[0][0]) # print(confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))) scipy_score = classification_report(y_aggr_test, y_aggr_pred, output_dict=True)['accuracy'] print('short {:.2f} and aggr {:.2f}'.format(score[1], scipy_score)) # save model open("Models\\model_cnn2d.json", "w").write(model.to_json()) pickle.dump(model.get_config(), open("Models\\model_cnn2d.pickle", "wb")) model.save_weights("Models\\model_cnn2d.h5") # results return score[1]
test_x = train[-2000:] test_label = train_data.label[-2000:] test_onehot = train_data.onehot[-2000:] class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(train_data.label), y=train_data.label) #CNN model inputs = Input(shape=(48,48,1)) # First convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=64, padding='same', activation='relu', name='layer_conv1')(inputs) net = MaxPooling2D(pool_size=2, strides=2)(net) net = BatchNormalization(axis = -1)(net) net = Dropout(0.25)(net) # Second convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=128, padding='same', activation='relu', name='layer_conv2')(net) net = MaxPooling2D(pool_size=2, strides=2)(net) net = BatchNormalization(axis = -1)(net) net = Dropout(0.25)(net) # Third convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=256, padding='same', activation='relu', name='layer_conv3')(net) net = MaxPooling2D(pool_size=2, strides=2)(net)
from tensorflow.python import keras from tensorflow.python.keras import layers from tensorflow.python.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPool2D, AvgPool2D, GlobalMaxPool2D, GlobalAvgPool2D, BatchNormalization, add, Input from tensorflow.python.keras.models import Model from tensorflow.python.keras.optimizers import Adam image_size = 224 input_object = Input(shape=(image_size, image_size, 3)) output = Conv2D(256, kernel_size=1, strides=1, padding="same")(input_object) output = Activation("relu")(output) output = Conv2D(256, kernel_size=1, strides=1, padding="same")(output) output = Activation("relu")(output) output = Conv2D(256, kernel_size=1, strides=1, padding="same")(output) output = Activation("relu")(output) output = Conv2D(256, kernel_size=1, strides=1, padding="same")(output) output = Activation("relu")(output) output = GlobalAvgPool2D()(output) #output = Flatten()(output) #output = Dense(1000)(output) #output = Activation("relu")(output) #output = MaxPool2D(pool_size=(3,3), strides=(2,2))(output) model = Model(inputs=input_object, outputs=output)
def __call__(self, net, training=None): net = Conv2D(self.filters, self.kernelSize, strides=self.strides, padding='same')(net) net = BatchNormalization()(net, training=training) net = LeakyReLU()(net) return net
def ResNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2', repetitions=None, initial_filters=64, activation='softmax', include_top=True, input_tensor=None, dropout=None, transition_dilation_rate=(1, 1), initial_strides=(2, 2), initial_kernel_size=(7, 7), initial_pooling='max', final_pooling=None, top='classification'): """Builds a custom ResNet like architecture. Defaults to ResNet50 v2. Args: input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` dim ordering) or `(3, 224, 224)` (with `channels_first` dim ordering). It should have exactly 3 dimensions, and width and height should be no smaller than 8. E.g. `(224, 224, 3)` would be one valid value. classes: The number of outputs at final softmax layer block: The block function to use. This is either `'basic'` or `'bottleneck'`. The original paper used `basic` for layers < 50. repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3]. residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu conv. See [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) for details. dropout: None for no dropout, otherwise rate of dropout from 0 to 1. Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper. transition_dilation_rate: Dilation rate for transition layers. For semantic segmentation of images use a dilation rate of (2, 2). initial_strides: Stride of the very first residual unit and MaxPooling2D call, with default (2, 2), set to (1, 1) for small images like cifar. initial_kernel_size: kernel size of the very first convolution, (7, 7) for imagenet and (3, 3) for small image datasets like tiny imagenet and cifar. See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. initial_pooling: Determine if there will be an initial pooling layer, 'max' for imagenet and None for small image datasets. See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. final_pooling: Optional pooling mode for feature extraction at the final model layer when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. top: Defines final layers to evaluate based on a specific problem type. Options are 'classification' for ImageNet style problems, 'segmentation' for problems like the Pascal VOC dataset, and None to exclude these layers entirely. Returns: The keras `Model`. """ if activation not in ['softmax', 'sigmoid', None]: raise ValueError( 'activation must be one of "softmax", "sigmoid", or None') if activation == 'sigmoid' and classes != 1: raise ValueError( 'sigmoid activation can only be used when classes = 1') if repetitions is None: repetitions = [3, 4, 6, 3] # Determine proper input shape # input_shape = _obtain_input_shape(input_shape, # default_size=32, # min_size=8, # data_format=K.image_data_format(), # require_flatten=include_top) _handle_dim_ordering() if len(input_shape) != 3: raise Exception( "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") if block == 'basic': block_fn = basic_block elif block == 'bottleneck': block_fn = bottleneck elif isinstance(block, six.string_types): block_fn = _string_to_function(block) else: block_fn = block if residual_unit == 'v2': residual_unit = _bn_relu_conv elif residual_unit == 'v1': residual_unit = _conv_bn_relu elif isinstance(residual_unit, six.string_types): residual_unit = _string_to_function(residual_unit) else: residual_unit = residual_unit # Permute dimension order if necessary if K.image_data_format() == 'channels_first': input_shape = (input_shape[1], input_shape[2], input_shape[0]) # Determine proper input shape # input_shape = _obtain_input_shape(input_shape, # default_size=32, # min_size=8, # data_format=K.image_data_format(), # require_flatten=include_top) img_input = Input(shape=input_shape, tensor=input_tensor) x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size, strides=initial_strides)(img_input) if initial_pooling == 'max': x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x) block = x filters = initial_filters for i, r in enumerate(repetitions): transition_dilation_rates = [transition_dilation_rate] * r transition_strides = [(1, 1)] * r if transition_dilation_rate == (1, 1): transition_strides[0] = (2, 2) block = _residual_block( block_fn, filters=filters, stage=i, blocks=r, is_first_layer=(i == 0), dropout=dropout, transition_dilation_rates=transition_dilation_rates, transition_strides=transition_strides, residual_unit=residual_unit)(block) filters *= 2 # Last activation x = _bn_relu(block) # Classifier block if include_top and top is 'classification': x = GlobalAveragePooling2D()(x) x = Dense(units=classes, activation=activation, kernel_initializer="he_normal")(x) elif include_top and top is 'segmentation': x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape x = Reshape((row * col, classes))(x) x = Activation(activation)(x) x = Reshape((row, col, classes))(x) elif final_pooling == 'avg': x = GlobalAveragePooling2D()(x) elif final_pooling == 'max': x = GlobalMaxPooling2D()(x) model = Model(inputs=img_input, outputs=x) return model
def make_model(model_file, weights_file, anchor_file, end_step, initial_sparsity, end_sparsity, frequency, **kwargs): annotation_path = 'model_data/combined1.txt' log_dir = 'logs/000/' classes_path = 'model_data/classes.txt' anchors_path = 'model_data/yolo_anchors.txt' class_names = get_classes(classes_path) num_classes = len(class_names) anchors = np.load(anchor_file,allow_pickle=True) model_path = 'model_data/' init_model= model_path + '/pelee3' new_pruned_keras_file = model_path + 'pruned_' + init_model epochs = 100 batch_size = 16 init_epoch = 50 input_shape = (384,288) # multiple of 32, hw log_dir = 'logs/000/' config_path = model_file weights_path = weights_file output_path = model_file + '.tf' output_root = os.path.splitext(output_path)[0] val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_val = int(len(lines)*val_split) num_train = len(lines) - num_val # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray( shape=(3, ), dtype='int32', buffer=weights_file.read(12)) if (major*10+minor)>=2 and major<1000 and minor<1000: seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) first_layer = True print('Creating Keras model.') all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 out_index = [] pruning_params = { 'pruning_schedule':tfmot.sparsity.keras.PolynomialDecay(initial_sparsity = initial_sparsity, final_sparsity = end_sparsity, begin_step = 0, end_step = end_step, frequency = frequency) } for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray( shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray( shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation != 'linear': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) # Create Conv2D layer if stride>1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer) if(first_layer): conv_layer = Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding)(prev_layer) else: conv_layer = prune.prune_low_magnitude(Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding), **pruning_params)(prev_layer) if batch_normalize: conv_layer = BatchNormalization( weights=bn_weight_list)(conv_layer) prev_layer = conv_layer first_layer=False if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif activation == 'swish': act_layer = sigmoid(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D( pool_size=(size, size), strides=(stride, stride), padding='same')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] all_layers.append(LeakyReLU(alpha=0.1)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): out_index.append(len(all_layers)-1) all_layers.append(None) prev_layer = all_layers[-1] elif section.startswith('net'): height = int(cfg_parser[section]['height']) width = int(cfg_parser[section]['width']) input_layer = Input(shape=(height, width, 3)) prev_layer = input_layer output_size = (width, height) else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. if len(out_index)==0: out_index.append(len(all_layers)-1) num_anchors = len(anchors[0]) num_layers = len(out_index) if(num_layers>0): shape = K.int_shape(all_layers[out_index[0]]) y1_reshape = KLayer.Reshape((shape[1],shape[2], num_anchors, 5 + num_classes), name='l1')(all_layers[out_index[0]]) if(num_layers>1): shape = K.int_shape(all_layers[out_index[1]]) y2_reshape = KLayer.Reshape((shape[1],shape[2], num_anchors, 5 + num_classes), name='l2')(all_layers[out_index[1]]) yolo_model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) if(num_layers > 1): yolo_model_wrapper = Model(input_layer, [y1_reshape, y2_reshape]) else: yolo_model_wrapper = Model(input_layer, [y1_reshape]) print(yolo_model.summary()) return yolo_model,yolo_model_wrapper,output_size if False: if args.weights_only: model.save_weights('{}'.format(output_path)) print('Saved Keras weights to {}'.format(output_path)) else: model.save('{}'.format(output_path),save_format='tf') print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if True: model = create_model(model, anchors, num_classes, input_shape, input_layer, layers, out_index) yolo_model_wrapper.compile( loss=tf.keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'], callbacks = [ sparsity.keras.pruning_callbacks.UpdatePruningStep(), sparsity.keras.pruning_callbacks.PruningSummaries(log_dir=log_dir, profile_batch=0) ] ) for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=Adam(lr=1e-3), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change print('Unfreeze all of the layers.') print(model.summary()) batch_size = 16 # note that more GPU memory is required after unfreezing the body print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), steps_per_epoch=max(1, num_train//batch_size), validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val//batch_size), epochs=5, initial_epoch=0) #m2train.m2train(args,model) #score = model.evaluate(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), # class_names, verbose=0) #print('Test loss:', score[0]) #print('Test accuracy:', score[1]) final_model=model final_model = sparsity.keras.prune.strip_pruning(model) final_model.summary() print('Saving pruned model to: ', new_pruned_keras_file) final_model.save('{}'.format(output_path),save_format='tf') tflite_model_file = model_path + "sparse.tf" converter = tf.lite.TFLiteConverter.from_keras_model(final_model) converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE] tflite_model = converter.convert() with open(tflite_model_file, 'wb') as f: f.write(tflite_model)
def cifar100_complicated_ensemble_submodel5( input_shape=None, input_tensor=None, n_classes=None, weights_path: Union[None, str] = None) -> Model: """ Defines a cifar100 network. :param n_classes: used in order to be compatible with the main script. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained custom network's weights. :return: Keras functional API Model. """ inputs = create_inputs(input_shape, input_tensor) # Define a weight decay for the regularisation. weight_decay = 1e-5 # Block 1. x = Conv2D(64, (3, 3), padding='same', activation='elu', name='block1_conv1', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(name='block1_batch-norm1')(x) x = Conv2D(64, (3, 3), padding='same', activation='elu', name='block1_conv2', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block1_batch-norm2')(x) x = MaxPooling2D(pool_size=(2, 2), name='block1_pool')(x) x = Dropout(0.2, name='block1_dropout', seed=0)(x) # Block 2. x = Conv2D(128, (3, 3), padding='same', activation='elu', name='block2_conv1', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block2_batch-norm1')(x) x = Conv2D(128, (3, 3), padding='same', activation='elu', name='block2_conv2', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block2_batch-norm2')(x) x = MaxPooling2D(pool_size=(2, 2), name='block2_pool')(x) x = Dropout(0.3, name='block2_dropout', seed=0)(x) # Block 3. x = Conv2D(256, (3, 3), padding='same', activation='elu', name='block3_conv1', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block3_batch-norm1')(x) x = Conv2D(256, (3, 3), padding='same', activation='elu', name='block3_conv2', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='block3_batch-norm2')(x) x = MaxPooling2D(pool_size=(2, 2), name='block3_pool')(x) x = Dropout(0.5, name='block3_dropout', seed=0)(x) # Add top layers. x = Flatten(name='flatten')(x) outputs = Dense(n_classes, activation='softmax', name='softmax_outputs')(x) # Create Submodel 4. model = Model(inputs, outputs, name='cifar100_complicated_ensemble_submodel5') # Load weights, if they exist. load_weights(weights_path, model) return model
def upsample_1(x, factor, **kwargs): x = Conv2D(num_filters * (factor ** 2), 3, padding='same', **kwargs)(x) return Lambda(pixel_shuffle(scale=factor))(x)
x_test = x_test.astype('float32') / 255 y_train = to_categorical(y_train, 6) y_test = to_categorical(y_test, 6) # input = Input(shape=(128, 128, 3)) # model = VGG16(weights=None, include_top=False, input_tensor=input, pooling='None') # x = model.output # x = Flatten()(x) # x = Dense(4096, activation='relu')(x) # x = Dense(4096, activation='relu')(x) # predictions = Dense(6, activation='softmax')(x) # model = tf.keras.Model(inputs=model.input, outputs=predictions) k_size = (3,3) input = Input(shape=(128, 128, 3)) x = Conv2D(32, k_size, padding='same', strides=2)(input) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(64, k_size, padding='same', strides=1)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPool2D((2,2))(x) x = Conv2D(128, k_size, padding='same', strides=2)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(256, k_size, padding='same', strides=2)(x) x = BatchNormalization()(x)
for i in range(input_X.shape[0]): mx = np.random.randint(-50, 50) my = np.random.randint(-50, 50) input_X[i] = move_image(input_X[i].reshape(96, 96) * 255, (mx, my)).reshape(96, 96, 1) / 255.0 labels[i][np.arange(0, 30, 2)] = labels[i][np.arange(0, 30, 2)] + my + 50 labels[i][np.arange(1, 30, 2)] = labels[i][np.arange(1, 30, 2)] + mx + 50 labels[i] = labels[i] / (96 + 50) from_i = 0 to_i = from_i + 1500 model = tf.keras.models.Sequential() model.add( Conv2D(input_shape=(96, 96, 1), filters=128, kernel_size=(5, 5), activation='relu')) model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='valid')) model.add(Dropout(0.25)) model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu')) model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='valid')) model.add(Dropout(0.25)) model.add(Conv2D(filters=48, kernel_size=(3, 3), activation='relu')) model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='valid')) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(500, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(90, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(30))
def reconstruction(x, ch='ddfn', l2_coeff=0): if ch == 'ddfn': return Conv2D(3, 1, name='recons', kernel_regularizer=l2(l2_coeff))(x)
try: from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Conv2D, MaxPooling2D from tensorflow.python.keras.layers import BatchNormalization except: from tensorflow.contrib.keras.python.keras.layers import Conv2D, MaxPooling2D from tensorflow.contrib.keras.python.keras.models import Sequential from tensorflow.contrib.keras.python.keras.layers.normalization import BatchNormalization from tfutils import Squeeze model = Sequential([ Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(None, None, 1)), BatchNormalization(), Conv2D(64, kernel_size=(4, 4), activation='relu'), MaxPooling2D(), Conv2D(64, kernel_size=(3, 3), activation='relu'), Conv2D(64, kernel_size=(3, 3), activation='relu'), MaxPooling2D(), Conv2D(64, kernel_size=(3, 3), activation='relu'), Conv2D(64, kernel_size=(3, 3), activation='relu'), MaxPooling2D(), Conv2D(200, kernel_size=(4, 4), activation='relu'), Conv2D(200, kernel_size=(1, 1), activation='relu'), Conv2D(3, kernel_size=(1, 1), activation='softmax'), # filters num == # of labels Squeeze(3), ])
def build_model3(self): vae_input = Input(shape=self.input_dim) #print("vae_input shape " + str(vae_input.shape)) vae_c1 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(vae_input) vae_m1 = MaxPooling2D((2, 2), padding='same')(vae_c1) vae_c2 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(vae_m1) vae_m2 = MaxPooling2D((2, 2), padding='same')(vae_c2) vae_c3 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(vae_m2) vae_m3 = MaxPooling2D((2, 2), padding='same')(vae_c3) vae_c4 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(vae_m3) vae_m4 = MaxPooling2D((2, 2), padding='same')(vae_c4) vae_c5 = Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(vae_m4) vae_m5 = MaxPooling2D((2, 2), padding='same')(vae_c5) vae_c6 = Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(vae_m5) vae_m6 = MaxPooling2D((2, 2), padding='same')(vae_c6) #print("vae_c1 shape " + str(vae_c1.shape)) #print("vae_c2 shape " + str(vae_c2.shape)) #print("vae_c3 shape " + str(vae_c3.shape)) #print("vae_c4 shape " + str(vae_c4.shape)) # print("vae_c5 shape " + str(vae_c5.shape)) #print("vae_m1 shape " + str(vae_m1.shape)) #print("vae_m2 shape " + str(vae_m2.shape)) #print("vae_m3 shape " + str(vae_m3.shape)) #print("vae_m4 shape " + str(vae_m4.shape)) #print("vae_m5 shape " + str(vae_m5.shape)) #print("vae_m6 shape " + str(vae_m6.shape)) vae_z_in = Flatten()(vae_m6) print("vae_z_in shape " + str(vae_z_in.shape)) vae_z_mean = Dense(self.z_dim)(vae_z_in) vae_z_log_var = Dense(self.z_dim)(vae_z_in) #print("vae_z_mean shape " + str(vae_z_mean.shape)) #print("vae_z_log_var shape " + str(vae_z_log_var.shape)) vae_z = Lambda(self.sampling)([vae_z_mean, vae_z_log_var]) vae_z_input = Input(shape=(self.z_dim, )) #print("vae_z shape " + str(vae_z.shape)) #print("vae_z_input shape " + str(vae_z_input.shape)) vae_z_out = Reshape((5, 5, 1)) vae_z_out_model = vae_z_out(vae_z) #print("vae_z_out_model shape " + str(vae_z_out_model.shape)) #vae_d1 = Conv2D( filters=8, kernel_size=(3, 3), padding='same', activation='relu') vae_u1 = UpSampling2D((3, 4)) vae_d2 = Conv2D(filters=16, kernel_size=(3, 3), padding='same', activation='relu') vae_u2 = UpSampling2D((2, 2)) vae_d3 = Conv2D(filters=16, kernel_size=(3, 3), padding='same', activation='relu') vae_u3 = UpSampling2D((2, 2)) vae_d4 = Conv2D(filters=8, kernel_size=(3, 3), padding='same', activation='relu') vae_u4 = UpSampling2D((2, 2)) vae_d5 = Conv2D(filters=4, kernel_size=(3, 3), padding='same', activation='relu') vae_u5 = UpSampling2D((2, 2)) vae_d6 = Conv2D(filters=1, kernel_size=(3, 3), padding='same', activation='sigmoid') # vae_d1_model = vae_d1(vae_z_out_model) vae_u1_model = vae_u1(vae_z_out_model) vae_d2_model = vae_d2(vae_u1_model) vae_u2_model = vae_u2(vae_d2_model) vae_d3_model = vae_d3(vae_u2_model) vae_u3_model = vae_u3(vae_d3_model) vae_d4_model = vae_d4(vae_u3_model) vae_u4_model = vae_u4(vae_d4_model) vae_d5_model = vae_d5(vae_u4_model) vae_u5_model = vae_u5(vae_d5_model) vae_d6_model = vae_d6(vae_u5_model) #print("vae_d1_model shape " + str(vae_d1_model.shape)) #print("vae_u1_model shape " + str(vae_u1_model.shape)) #print("vae_d2_model shape " + str(vae_d2_model.shape)) #print("vae_u2_model shape " + str(vae_u2_model.shape)) #print("vae_d3_model shape " + str(vae_d3_model.shape)) #print("vae_u3_model shape " + str(vae_u3_model.shape)) #print("vae_d4_model shape " + str(vae_d4_model.shape)) #print("vae_u4_model shape " + str(vae_u4_model.shape)) #print("vae_d5_model shape " + str(vae_d5_model.shape)) #240 120 60 30 15 #320 160 80 40 20 vae_dense_decoder = vae_z_input vae_z_out_decoder = vae_z_out(vae_dense_decoder) #vae_d1_decoder = vae_d1(vae_z_out_decoder) vae_u1_decoder = vae_u1(vae_z_out_decoder) vae_d2_decoder = vae_d2(vae_u1_decoder) vae_u2_decoder = vae_u2(vae_d2_decoder) vae_d3_decoder = vae_d3(vae_u2_decoder) vae_u3_decoder = vae_u3(vae_d3_decoder) vae_d4_decoder = vae_d4(vae_u3_decoder) vae_u4_decoder = vae_u4(vae_d4_decoder) vae_d5_decoder = vae_d5(vae_u4_decoder) vae_u5_decoder = vae_u5(vae_d5_decoder) vae_d6_decoder = vae_d6(vae_u5_decoder) print("vae_d1_decoder shape " + str(vae_u1_decoder.shape)) print("vae_d2_decoder shape " + str(vae_d2_decoder.shape)) print("vae_d3_decoder shape " + str(vae_d3_decoder.shape)) print("vae_d4_decoder shape " + str(vae_d4_decoder.shape)) print("vae_d5_decoder shape " + str(vae_d5_decoder.shape)) # Models vae = Model(vae_input, vae_d6_model) vae_encoder = Model(vae_input, vae_z) vae_decoder = Model(vae_z_input, vae_d6_decoder) def vae_r_loss(y_true, y_pred): y_true_flat = K.flatten(y_true) y_pred_flat = K.flatten(y_pred) return 10 * K.mean(K.square(y_true_flat - y_pred_flat), axis=-1) def vae_kl_loss(y_true, y_pred): return -0.5 * K.mean(1 + vae_z_log_var - K.square(vae_z_mean) - K.exp(vae_z_log_var), axis=-1) def vae_loss(y_true, y_pred): return vae_r_loss(y_true, y_pred) + vae_kl_loss(y_true, y_pred) #vae.compile(optimizer='rmsprop', loss = vae_loss, metrics = [vae_r_loss, vae_kl_loss]) vae.compile(optimizer=Adam(lr=0.005), loss=vae_loss, metrics=[vae_r_loss, vae_kl_loss]) vae.summary() return (vae, vae_encoder, vae_decoder)
def build_fpn(backbone, fpn_layers, classes=21, activation='softmax', upsample_rates=(2, 2, 2), last_upsample=4, pyramid_filters=256, segmentation_filters=128, use_batchnorm=False, dropout=None, interpolation='bilinear'): """ Implementation of FPN head for segmentation models according to: http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf Args: backbone: Keras `Model`, some classification model without top layers: list of layer names or indexes, used for pyramid building classes: int, number of output feature maps activation: activation in last layer, e.g. 'sigmoid' or 'softmax' upsample_rates: tuple of integers, scaling rates between pyramid blocks pyramid_filters: int, number of filters in `M` blocks of top-down FPN branch segmentation_filters: int, number of filters in `P` blocks of FPN last_upsample: rate for upsumpling concatenated pyramid predictions to match spatial resolution of input data last_upsampling_type: 'nn' or 'bilinear' dropout: float [0, 1), dropout rate use_batchnorm: bool, include batch normalization to FPN between `conv` and `relu` layers Returns: model: Keras `Model` """ if len(upsample_rates) != len(fpn_layers): raise ValueError( 'Number of intermediate feature maps and upsample steps should match' ) # extract model layer outputs outputs = extract_outputs(backbone, fpn_layers, include_top=True) # add upsample rate `1` for first block upsample_rates = [1] + list(upsample_rates) # top - down path, build pyramid m = None pyramid = [] for i, c in enumerate(outputs): m, p = pyramid_block(pyramid_filters=pyramid_filters, segmentation_filters=segmentation_filters, upsample_rate=upsample_rates[i], use_batchnorm=use_batchnorm, stage=i)(c, m) pyramid.append(p) # upsample and concatenate all pyramid layer upsampled_pyramid = [] for i, p in enumerate(pyramid[::-1]): if upsample_rates[i] > 1: upsample_rate = to_tuple(np.prod(upsample_rates[:i + 1])) p = ResizeImage(upsample_rate, interpolation=interpolation)(p) upsampled_pyramid.append(p) x = Concatenate()(upsampled_pyramid) # final convolution n_filters = segmentation_filters * len(pyramid) x = Conv2DBlock(n_filters, (3, 3), use_batchnorm=use_batchnorm, padding='same')(x) if dropout is not None: x = SpatialDropout2D(dropout)(x) x = Conv2D(classes, (3, 3), padding='same')(x) # upsampling to original spatial resolution x = ResizeImage(to_tuple(last_upsample), interpolation=interpolation)(x) # activation x = Activation(activation)(x) model = Model(backbone.input, x) return model
target_size=(sz, sz), batch_size=16, class_mode='categorical') test_gen = datagen.flow_from_directory(test_dir, target_size=(sz, sz), batch_size=16, class_mode='categorical') label_map = train_gen.class_indices print(label_map) label_map1 = test_gen.class_indices print(label_map1) model = Sequential() model.add(Conv2D(64, (3, 3), input_shape=input_shape)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) # model.add(Dense(64)) # model.add(Activation('softmax')) # model.add(Dropout(0.5))
def build(self, input_shape): input_tensor = Input(shape=input_shape) # vgg block 1 x = Conv2D(64, 3, padding='same', activation='relu', name='block1_conv1')(input_tensor) x = Conv2D(64, 3, padding='same', activation='relu', name='block1_conv2')(x) x = MaxPooling2D(2, 2, padding='same', name='block1_pool')(x) # vgg block 2 x = Conv2D(128, 3, padding='same', activation='relu', name='block2_conv1')(x) x = Conv2D(128, 3, padding='same', activation='relu', name='block2_conv2')(x) x = MaxPooling2D(2, 2, padding='same', name='block2_pool')(x) # vgg block 3 x = Conv2D(256, 3, padding='same', activation='relu', name='block3_conv1')(x) x = Conv2D(256, 3, padding='same', activation='relu', name='block3_conv2')(x) x = Conv2D(256, 3, padding='same', activation='relu', name='block3_conv3')(x) x = MaxPooling2D(2, 2, padding='same', name='block3_pool')(x) # vgg block 4 x = Conv2D(512, 3, padding='same', activation='relu', name='block4_conv1')(x) x = Conv2D(512, 3, padding='same', activation='relu', name='block4_conv2')(x) conv43 = Conv2D(512, 3, padding='same', activation='relu', name='block4_conv3')(x) x = MaxPooling2D(2, 2, padding='same', name='block4_pool')(conv43) # vgg block 5 x = Conv2D(512, 3, padding='same', activation='relu', name='block5_conv1')(x) x = Conv2D(512, 3, padding='same', activation='relu', name='block5_conv2')(x) x = Conv2D(512, 3, padding='same', activation='relu', name='block5_conv3')(x) x = MaxPooling2D(3, 1, padding='same', name='block5_pool')(x) # vgg fc modified as conv conv6 = Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same', name='conv6')(x) conv7 = Conv2D(1024, 1, activation='relu', padding='same', name='conv7')(conv6) # extra feature map layers extra1 = ConvBn(256, 1, name='extra1_shrink')(conv7) extra1 = ConvBn(512, 3, strides=2, padding='same', name='extra1')(extra1) extra2 = ConvBn(128, 1, name='extra2_shrink')(extra1) extra2 = ConvBn(256, 3, strides=2, padding='same', name='extra2')(extra2) extra3 = ConvBn(128, 1, name='extra3_shrink')(extra2) extra3 = ConvBn(256, 3, name='extra3')(extra3) extra4 = ConvBn(128, 1, name='extra4_shrink')(extra3) extra4 = ConvBn(256, 3, name='extra4')(extra4) extra5 = ConvBn(128, 1, name='extra5_shrink')(extra4) extra5 = ConvBn(256, 4, name='extra5')(extra5) # heads feature_maps = [conv43, conv7, extra1, extra2, extra3, extra4, extra5] confs, locs, anchors = [], [], [] for i in range(len(feature_maps)): map = feature_maps[i] conf = self.conf_layers[i](map) loc = self.loc_layers[i](map) anchor = self.anchor_layers[i](map) confs.append(conf) locs.append(loc) anchors.append(anchor) confs_reshaped = [Reshape((-1, 1))(conf) for conf in confs] locs_reshaped = [Reshape((-1, 4))(loc) for loc in locs] anchors_reshaped = [Reshape((-1, 4))(db) for db in anchors] conf_concat = Concatenate(axis=1, name='scores')(confs_reshaped) loc_concat = Concatenate(axis=1, name='offsets')(locs_reshaped) anchor_concat = Concatenate(axis=1, name='default_boxes')(anchors_reshaped) self.model = Model(input_tensor, [conf_concat, loc_concat, anchor_concat], name='ssd_vgg16')
def MobileNetV2(classes=1000, input_tensor=None, input_shape=(512, 512, 3), weights_info=None, OS=16, alpha=1., include_top=True): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC or Cityscapes. This model is available for TensorFlow only. # Arguments classes: Integer, optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width weights_info: this dict is consisted of `classes` and `weghts`. `classes` is number of `weights` output units. `weights` is one of 'imagenet' (pre-training on ImageNet), 'pascal_voc', 'cityscapes', original weights path (pre-training on original data) or None (random initialization) OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1. include_top: Boolean, whether to include the fully-connected layer at the top of the network. Defaults to `True`. # Returns A Keras model instance. """ if weights_info is not None: if weights_info.get("weights") is None: weights = None elif weights_info["weights"] in { 'imagenet', 'pascal_voc', 'cityscapes', None }: weights = weights_info["weights"] elif os.path.exists(weights_info["weights"]) and weights_info.get( "classes") is not None: classes = int(weights_info["classes"]) weights = weights_info["weights"] else: raise ValueError( 'The `weights` should be either ' '`None` (random initialization), `imagenet`, `pascal_voc`, `cityscapes`, ' 'original weights path (pre-training on original data), ' 'or the path to the weights file to be loaded and' '`classes` should be number of original weights output units') else: weights = None if classes is None: raise ValueError('`classes` should be any number') if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = input_tensor # If input_shape is None, infer shape from input_tensor if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 if weights == 'imagenet': if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.35`, `0.50`, `0.75`, ' '`1.0`, `1.3` or `1.4` only.') if rows != cols or rows not in [96, 128, 160, 192, 224]: rows = 224 OS = 8 first_block_filters = _make_divisible(32 * alpha, 8) x = Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(img_input) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) x = Activation(relu6, name='Conv_Relu6')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3, skip_connection=False) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5, skip_connection=True) # stride in block 6 changed from 2 -> 1, so we need to use rate = 2 x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, # 1! expansion=6, block_id=6, skip_connection=False) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=7, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=8, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=9, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=10, skip_connection=False) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=11, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=12, skip_connection=True) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, rate=2, # 1! expansion=6, block_id=13, skip_connection=False) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=14, skip_connection=True) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=15, skip_connection=True) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4, expansion=6, block_id=16, skip_connection=False) if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x) x = Activation(relu6, name='out_relu')(x) x = GlobalAveragePooling2D()(x) x = Dense(classes, activation='softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='mobilenetv2') # Load weights. if weights == 'imagenet': print("movilenetv2 load model imagenet") model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif not (weights in {'pascal_voc', 'cityscapes', None}): model.load_weights(weights) if include_top: return model else: # get last _inverted_res_block layer no_top_model = Model(inputs=model.input, outputs=model.get_layer(index=-6).output) return no_top_model
# Comienza la construcción del modelo Keras Sequential. model = Sequential() # Agrega una capa de entrada que es similar a un feed_dict en TensorFlow. # Tenga en cuenta que la forma de entrada debe ser una tupla que contenga el tamaño de la imagen. model.add(InputLayer(input_shape=img_shape_full)) # La entrada es una matriz aplanada con 784 elementos (img_size * img_size), # pero las capas convolucionales esperan imágenes con forma (28, 28, 1), por tanto hacemos un reshape model.add(Reshape(img_shape_full)) # Primera capa convolucional con ReLU-activation y max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='layer_conv1')) model.add(MaxPooling2D(pool_size=2, strides=2)) # Segunda capa convolucional con ReLU-activation y max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation='relu', name='layer_conv2')) model.add(MaxPooling2D(pool_size=2, strides=2)) # Aplanar la salida de 4 niveles de las capas convolucionales