def __init__(self, model_file, num_classes, input_image_size): if not gfile.Exists(model_file) or input_image_size != (224, 224, 3): print("need download the model") mobile_net = MobileNet(weights='imagenet', input_shape=input_image_size) self.mobile_net_model = models.Model( inputs=mobile_net.input, outputs=mobile_net.get_layer( 'global_average_pooling2d').output) print("save the downloaded model for reuse") mobile_net.save(model_file) else: self.mobile_net_model = models.load_model(model_file) classes = num_classes self.inputs = layers.Input(shape=(1024, )) self.outputs = layers.Dense(classes, activation='softmax', name='final_output')(self.inputs) self.one_layer_model = models.Model(inputs=[self.inputs], outputs=[self.outputs]) final_output = layers.Dense(classes, activation='softmax', name='final_output')( self.mobile_net_model.output) self.final_model = models.Model(inputs=self.mobile_net_model.inputs, outputs=final_output)
def build(): # Encoder: MobileNet (feature extractor) mobNet = MobileNet( input_shape=(224, 224, 3), # Use 224 by 224 images with 3 channels (RGB) alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=False, # Remove the last classifier weights='imagenet', # Pretrained on ImageNet input_tensor=None, pooling=None) decIn = mobNet.layers[-1].output # Decoder # Upsample 1 conv1Out = Conv2D(512, (5, 5), padding="same")(decIn) up1Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv1Out) # Upsample 2 conv2Out = Conv2D(256, (5, 5), padding="same")(up1Out) up2Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv2Out) # Skip connection 1 skip1 = mobNet.get_layer("conv_pw_5_relu").output skip1Out = Add()([up2Out, skip1]) # Upsample 3 conv3Out = Conv2D(128, (5, 5), padding="same")(skip1Out) up3Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv3Out) # Skip connection 2 skip2 = mobNet.get_layer("conv_pw_3_relu").output skip2Out = Add()([up3Out, skip2]) # Upsample 4 conv4Out = Conv2D(64, (5, 5), padding="same")(skip2Out) up4Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv4Out) # Skip connection 3 skip3 = mobNet.get_layer("conv_pw_1_relu").output skip3Out = Add()([up4Out, skip3]) # Upsample 5 conv5Out = Conv2D(32, (5, 5), padding="same")(skip3Out) up5Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv5Out) # Pointwise conv decOut = Conv2D(1, (1, 1), padding="same")(up5Out) # Combine full model model = Model(inputs=mobNet.input, outputs=decOut) return model
def create_object_basic_model(): MobileNet_model = MobileNet(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)) MobileNet_model_out = MobileNet_model.get_layer('conv_pw_13_relu').output MobileNet_model_out = GlobalAveragePooling2D()(MobileNet_model_out) MobileNet_model_out = Dense(8, activation='softmax')(MobileNet_model_out) model = Model(inputs=MobileNet_model.input, outputs=MobileNet_model_out) return model
def mobilenet_v1(num_classes, inputs, modifier = None): from tensorflow.keras.applications import MobileNet backbone = MobileNet(input_tensor = inputs, include_top = False, pooling = None) layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu'] layer_outputs = [backbone.get_layer(name).output for name in layer_names] backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name) if modifier: backbone = modifier(backbone) return backbone
def _get_encoder(self): mobile_net = MobileNet(weights='imagenet', include_top=False, input_shape=self.input_shape) layer_names = [ 'conv_pw_1_relu', 'conv_pw_3_relu', 'conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu', ] layers = [mobile_net.get_layer(name).output for name in layer_names] down_stack = tf.keras.Model(inputs=mobile_net.input, outputs=layers) down_stack.trainable = False return down_stack
def mobilenet_encoder(input_shape=[224, 224, 3]): mn = MobileNet(weights='imagenet', include_top=False, input_shape=input_shape) mn.trainable = False layer_names = [ 'conv_pw_1_relu', 'conv_pw_3_relu', 'conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu', ] layers = [mn.get_layer(name).output for name in layer_names] down_stack = tf.keras.Model(inputs=mn.input, outputs=layers) down_stack.trainable = False return down_stack
def get_mobilenet_SSD(image_size, num_classes): mobilenet = MobileNet(input_shape=image_size, include_top=False, weights="imagenet") for layer in mobilenet.layers: layer._name = layer.name + '_base' x = layers.BatchNormalization( beta_initializer='glorot_uniform', gamma_initializer='glorot_uniform')( mobilenet.get_layer(name='conv_pad_6_base').output) conf1 = layers.Conv2D(4 * num_classes, kernel_size=3, padding='same')(x) conf1 = layers.Reshape( (conf1.shape[1] * conf1.shape[2] * conf1.shape[3] // num_classes, num_classes))(conf1) loc1 = layers.Conv2D(4 * 4, kernel_size=3, padding='same')(x) loc1 = layers.Reshape( (loc1.shape[1] * loc1.shape[2] * loc1.shape[3] // 4, 4))(loc1) x = layers.MaxPool2D(3, 1, padding='same')( mobilenet.get_layer(name='conv_pad_12_base').output) x = layers.Conv2D(1024, 3, padding='same', dilation_rate=6, activation='relu')(x) x = layers.Conv2D(1024, 1, padding='same', activation='relu')(x) conf2 = layers.Conv2D(6 * num_classes, kernel_size=3, padding='same')(x) conf2 = layers.Reshape( (conf2.shape[1] * conf2.shape[2] * conf2.shape[3] // num_classes, num_classes))(conf2) loc2 = layers.Conv2D(6 * 4, kernel_size=3, padding='same')(x) loc2 = layers.Reshape( (loc2.shape[1] * loc2.shape[2] * loc2.shape[3] // 4, 4))(loc2) x = layers.Conv2D(256, 1, activation='relu')(x) x = layers.Conv2D(512, 3, strides=2, padding='same', activation='relu')(x) conf3 = layers.Conv2D(6 * num_classes, kernel_size=3, padding='same')(x) conf3 = layers.Reshape( (conf3.shape[1] * conf3.shape[2] * conf3.shape[3] // num_classes, num_classes))(conf3) loc3 = layers.Conv2D(6 * 4, kernel_size=3, padding='same')(x) loc3 = layers.Reshape( (loc3.shape[1] * loc3.shape[2] * loc3.shape[3] // 4, 4))(loc3) x = layers.Conv2D(128, 1, activation='relu')(x) x = layers.Conv2D(256, 3, strides=2, padding='same', activation='relu')(x) conf4 = layers.Conv2D(6 * num_classes, kernel_size=3, padding='same')(x) conf4 = layers.Reshape( (conf4.shape[1] * conf4.shape[2] * conf4.shape[3] // num_classes, num_classes))(conf4) loc4 = layers.Conv2D(6 * 4, kernel_size=3, padding='same')(x) loc4 = layers.Reshape( (loc4.shape[1] * loc4.shape[2] * loc4.shape[3] // 4, 4))(loc4) x = layers.Conv2D(128, 1, activation='relu')(x) x = layers.Conv2D(256, 3, activation='relu')(x) conf5 = layers.Conv2D(4 * num_classes, kernel_size=3, padding='same')(x) conf5 = layers.Reshape( (conf5.shape[1] * conf5.shape[2] * conf5.shape[3] // num_classes, num_classes))(conf5) loc5 = layers.Conv2D(4 * 4, kernel_size=3, padding='same')(x) loc5 = layers.Reshape( (loc5.shape[1] * loc5.shape[2] * loc5.shape[3] // 4, 4))(loc5) x = layers.Conv2D(128, 1, activation='relu')(x) x = layers.Conv2D(256, 3, activation='relu')(x) conf6 = layers.Conv2D(4 * num_classes, kernel_size=3, padding='same')(x) conf6 = layers.Reshape( (conf6.shape[1] * conf6.shape[2] * conf6.shape[3] // num_classes, num_classes))(conf6) loc6 = layers.Conv2D(4 * 4, kernel_size=3, padding='same')(x) loc6 = layers.Reshape( (loc6.shape[1] * loc6.shape[2] * loc6.shape[3] // 4, 4))(loc6) confs = layers.concatenate([conf1, conf2, conf3, conf4, conf5, conf6], axis=1) locs = layers.concatenate([loc1, loc2, loc3, loc4, loc5, loc6], axis=1) model = tf.keras.Model(inputs=mobilenet.layers[0].output, outputs=[confs, locs]) return model
def create_model(opt, metrics, loss, trainable_pretrained=True, input_shape=(224, 224, 3)): old_model = MobileNet(input_shape=input_shape, weights='imagenet', include_top=False) old_model.trainable = trainable_pretrained original_image = Lambda( lambda x: x, name='original_image', # trainable=True )(old_model.input) x = old_model.output y_names = [ "conv_pw_11_relu", "conv_pw_5_relu", "conv_pw_3_relu", "conv_pw_1_relu" ] f_nums = [1024, 64, 64, 64] ys = [ Conv2D(f_num, kernel_size=1, name=f'skip_hair_conv_{i}')( old_model.get_layer(name=name).output) for i, (name, f_num) in enumerate(zip(y_names, f_nums)) ] + [None] for i in range(5): y = ys[i] x = UpSampling2D(name=f'upsampling_hair_{i}')(x) if y is not None: x = Add(name=f'skip_hair_add_{i}')([x, y]) x = DepthwiseConv2D( kernel_size=3, padding='same', name=f'depth_conv2d_hair_{i}', kernel_initializer=GlorotNormal(seed=(i + 1)), )(x) x = Conv2D( 64, kernel_size=1, padding='same', name=f'conv2d_hair_{i}', kernel_regularizer=L2(2e-5), kernel_initializer=GlorotNormal(seed=11 * (i + 1)), )(x) x = ReLU(name=f'relu_hair_{i}')(x) x = Conv2D( # 1, 2, kernel_size=1, padding='same', name='conv2d_hair_final', kernel_regularizer=L2(2e-5), kernel_initializer=GlorotNormal(seed=0))(x) x = Softmax(name='sigmoid_hair_final')(x) x = Concatenate()([x, original_image]) # x = Activation('sigmoid', name='sigmoid_hair_final')(x) model = Model(old_model.input, x) if opt: model.compile( optimizer=opt, loss=loss, metrics=metrics, ) return model
def SSD_MOBILENET( config, label_maps, num_predictions=10, is_training=True, ): """ Construct an SSD network that uses MobileNetV1 backbone. Args: - config: python dict as read from the config file - label_maps: A python list containing the classes - num_predictions: The number of predictions to produce as final output - is_training: whether the model is constructed for training purpose or inference purpose Returns: - A keras version of SSD300 with MobileNetV1 as backbone network. Code References: - https://github.com/chuanqi305/MobileNet-SSD """ model_config = config["model"] input_shape = (model_config["input_size"], model_config["input_size"], 3) num_classes = len(label_maps) + 1 # for background class l2_reg = model_config["l2_regularization"] kernel_initializer = model_config["kernel_initializer"] default_boxes_config = model_config["default_boxes"] extra_box_for_ar_1 = model_config["extra_box_for_ar_1"] # construct the base network and extra feature layers base_network = MobileNet( input_shape=input_shape, alpha=config["model"]["width_multiplier"], depth_multiplier=config["model"]["depth_multiplier"], classes=num_classes, weights='imagenet', include_top=False) base_network.get_layer("input_1")._name = "input" for layer in base_network.layers: base_network.get_layer(layer.name)._kernel_initializer = "he_normal" base_network.get_layer(layer.name)._kernel_regularizer = l2(l2_reg) layer.trainable = False # each layer of the base network should not be trainable conv11 = base_network.get_layer("conv_pw_11_relu").output conv13 = base_network.get_layer("conv_pw_13_relu").output def conv_block_1(x, filters, name): x = Conv2D(filters=filters, kernel_size=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name=name, use_bias=False)(x) x = BatchNormalization(name=f"{name}/bn")(x) x = ReLU(name=f"{name}/relu")(x) return x def conv_block_2(x, filters, name): x = Conv2D(filters=filters, kernel_size=(3, 3), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name=name, use_bias=False, strides=(2, 2))(x) x = BatchNormalization(name=f"{name}/bn")(x) x = ReLU(name=f"{name}/relu")(x) return x conv14_1 = conv_block_1(x=conv13, filters=256, name="conv14_1") conv14_2 = conv_block_2(x=conv14_1, filters=512, name="conv14_2") conv15_1 = conv_block_1(x=conv14_2, filters=128, name="conv15_1") conv15_2 = conv_block_2(x=conv15_1, filters=256, name="conv15_2") conv16_1 = conv_block_1(x=conv15_2, filters=128, name="conv16_1") conv16_2 = conv_block_2(x=conv16_1, filters=256, name="conv16_2") conv17_1 = conv_block_1(x=conv16_2, filters=128, name="conv17_1") conv17_2 = conv_block_2(x=conv17_1, filters=256, name="conv17_2") model = Model(inputs=base_network.input, outputs=conv17_2) # construct the prediction layers (conf, loc, & default_boxes) scales = np.linspace(default_boxes_config["min_scale"], default_boxes_config["max_scale"], len(default_boxes_config["layers"])) mbox_conf_layers = [] mbox_loc_layers = [] mbox_default_boxes_layers = [] for i, layer in enumerate(default_boxes_config["layers"]): num_default_boxes = get_number_default_boxes( layer["aspect_ratios"], extra_box_for_ar_1=extra_box_for_ar_1) x = model.get_layer(layer["name"]).output layer_name = layer["name"] layer_mbox_conf = Conv2D(filters=num_default_boxes * num_classes, kernel_size=(3, 3), padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=l2(l2_reg), name=f"{layer_name}_mbox_conf")(x) layer_mbox_conf_reshape = Reshape( (-1, num_classes), name=f"{layer_name}_mbox_conf_reshape")(layer_mbox_conf) layer_mbox_loc = Conv2D(filters=num_default_boxes * 4, kernel_size=(3, 3), padding='same', kernel_initializer=kernel_initializer, kernel_regularizer=l2(l2_reg), name=f"{layer_name}_mbox_loc")(x) layer_mbox_loc_reshape = Reshape( (-1, 4), name=f"{layer_name}_mbox_loc_reshape")(layer_mbox_loc) layer_default_boxes = DefaultBoxes( image_shape=input_shape, scale=scales[i], next_scale=scales[i + 1] if i + 1 <= len(default_boxes_config["layers"]) - 1 else 1, aspect_ratios=layer["aspect_ratios"], variances=default_boxes_config["variances"], extra_box_for_ar_1=extra_box_for_ar_1, name=f"{layer_name}_default_boxes")(x) layer_default_boxes_reshape = Reshape( (-1, 8), name=f"{layer_name}_default_boxes_reshape")(layer_default_boxes) mbox_conf_layers.append(layer_mbox_conf_reshape) mbox_loc_layers.append(layer_mbox_loc_reshape) mbox_default_boxes_layers.append(layer_default_boxes_reshape) # concentenate class confidence predictions from different feature map layers mbox_conf = Concatenate(axis=-2, name="mbox_conf")(mbox_conf_layers) mbox_conf_softmax = Activation('softmax', name='mbox_conf_softmax')(mbox_conf) # concentenate object location predictions from different feature map layers mbox_loc = Concatenate(axis=-2, name="mbox_loc")(mbox_loc_layers) # concentenate default boxes from different feature map layers mbox_default_boxes = Concatenate( axis=-2, name="mbox_default_boxes")(mbox_default_boxes_layers) # concatenate confidence score predictions, bounding box predictions, and default boxes predictions = Concatenate(axis=-1, name='predictions')( [mbox_conf_softmax, mbox_loc, mbox_default_boxes]) if is_training: return Model(inputs=base_network.input, outputs=predictions) decoded_predictions = DecodeSSDPredictions( input_size=model_config["input_size"], num_predictions=num_predictions, name="decoded_predictions")(predictions) return Model(inputs=base_network.input, outputs=decoded_predictions)
# neuronal ha aprendido. from tensorflow.keras.applications import MobileNet, mobilenet from tensorflow.keras import Model import numpy as np from tensorflow.keras.preprocessing.image import load_img, save_img, img_to_array import tensorflow as tf img_w = 300 img_h = 300 crop_img = 5 #instanciamos el modelo model = MobileNet(weights="imagenet", include_top=False) layer = model.get_layer(index=-3) feature_extractor = Model(inputs=model.inputs, outputs=layer.output) """ La siguiente función se la copié a Chollet sin asco, es la única cosa que no terminé de entender """ def compute_loss(input_image, layer_filter): activation = feature_extractor( input_image ) # Esta es la imagen de salida, usando al model como función filter_activation = activation[:, 2:-2, 2:-2, layer_filter] # los 2 son por los bordes return tf.reduce_mean(filter_activation)
def load_backbone(backbone_type="resnet50", backbone_outputs=('C3', 'C4', 'C5', 'P6', 'P7'), num_features=256): global BACKBONE_LAYERS inputs = Input((None, None, 3), name='images') if backbone_type.lower() == 'resnet50': preprocess = BackBonePreProcess(rgb=False, mean_shift=True, normalize=0)(inputs) model = ResNet50(input_tensor=preprocess, include_top=False) elif backbone_type.lower() == 'resnet50v2': preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=2)(inputs) resnet50v2, _ = Classifiers.get('resnet50v2') model = resnet50v2(input_tensor=preprocess, include_top=False, weights='imagenet') elif backbone_type.lower() == "resnet101v2": preprocess = BackBonePreProcess(rgb=True, mean_shift=False, normalize=2)(inputs) model = ResNet101V2(input_tensor=preprocess, include_top=False, backend=tf.keras.backend, layers=tf.keras.layers, models=tf.keras.models, utils=tf.keras.utils) elif backbone_type.lower() == 'resnext50': preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=2)(inputs) model = ResNeXt50(input_tensor=preprocess, include_top=False) elif backbone_type.lower() == "seresnet50": preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=3)(inputs) seresnet50, _ = Classifiers.get('seresnet50') model = seresnet50(input_tensor=preprocess, original_input=inputs, include_top=False, weights='imagenet') elif backbone_type.lower() == "seresnet34": preprocess = BackBonePreProcess(rgb=True, mean_shift=False, normalize=0)(inputs) seresnet34, _ = Classifiers.get('seresnet34') model = seresnet34(input_tensor=preprocess, original_input=inputs, include_top=False, weights='imagenet') elif backbone_type.lower() == "seresnext50": preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=3)(inputs) seresnext50, _ = Classifiers.get('seresnext50') model = seresnext50(input_tensor=preprocess, original_input=inputs, include_top=False, weights='imagenet') elif backbone_type.lower() == "vgg16": preprocess = BackBonePreProcess(rgb=False, mean_shift=True, normalize=0)(inputs) model = VGG16(input_tensor=preprocess, include_top=False) elif backbone_type.lower() == "mobilenet": preprocess = BackBonePreProcess(rgb=False, mean_shift=False, normalize=2)(inputs) model = MobileNet(input_tensor=preprocess, include_top=False, alpha=1.0) elif backbone_type.lower() == 'efficientnetb2': preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=3)(inputs) model = efn.EfficientNetB2(input_tensor=preprocess, include_top=False, weights='imagenet') elif backbone_type.lower() == 'efficientnetb3': preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=3)(inputs) model = efn.EfficientNetB3(input_tensor=preprocess, include_top=False, weights='imagenet') elif backbone_type.lower() == 'efficientnetb4': preprocess = BackBonePreProcess(rgb=True, mean_shift=True, normalize=3)(inputs) model = efn.EfficientNetB4(input_tensor=preprocess, include_top=False, weights='imagenet') else: raise NotImplementedError( f"backbone_type은 {BACKBONE_LAYERS.keys()} 중에서 하나가 되어야 합니다.") model.trainable = False # Block Layer 가져오기 features = [] for key, layer_name in BACKBONE_LAYERS[backbone_type.lower()].items(): if key in backbone_outputs: layer_tensor = model.get_layer(layer_name).output features.append(Identity(name=key)(layer_tensor)) if backbone_type.lower() == "mobilenet": # Extra Layer for Feature Extracting Z6 = ZeroPadding2D(((0, 1), (0, 1)), name=f'P6_zeropadding')(features[-1]) P6 = Conv2D(num_features, (3, 3), strides=(2, 2), padding='valid', activation='relu', name=f'P6_conv')(Z6) if 'P6' in backbone_outputs: features.append(Identity(name='P6')(P6)) G6 = GroupNormalization(name=f'P6_norm')(P6) Z7 = ZeroPadding2D(((0, 1), (0, 1)), name=f'P7_zeropadding')(G6) P7 = Conv2D(num_features, (3, 3), strides=(2, 2), padding='valid', activation='relu', name=f'P7_conv')(Z7) if 'P7' in backbone_outputs: features.append(Identity(name=f'P7')(P7)) else: P6 = Conv2D(num_features, (3, 3), strides=(2, 2), padding='same', activation='relu', name=f'P6_conv')(features[-1]) if 'P6' in backbone_outputs: features.append(Identity(name=f'P6')(P6)) G6 = GroupNormalization(name=f'P6_norm')(P6) P7 = Conv2D(num_features, (3, 3), strides=(2, 2), padding='same', activation='relu', name=f'P7_conv')(G6) if 'P7' in backbone_outputs: features.append(Identity(name=f'P7')(P7)) return Model(inputs, features, name=backbone_type)
def ssd_300(mode, image_size, n_classes, l2_regularization=0.0005, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=None, clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=False, subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=True, return_predictor_sizes=False): n_predictor_layers = 6 # The number of predictor conv layers in the network is 6 for the original SSD300. n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(variances) != 4: raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(lambda z: z, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(lambda z: z - np.array(subtract_mean), output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(lambda z: z / np.array(divide_by_stddev), output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) #if swap_channels and (img_channels == 3): # x1 = Lambda(lambda z: z[..., ::-1], output_shape=(img_height, img_width, img_channels), # name='input_channel_swap')(x1) #conv4_3_norm , fc7 ,test= mobilenet(input_tensor=x1) mobilenet = MobileNet(input_shape=(224, 224, 3), include_top=False, weights='imagenet') FeatureExtractor = Model(inputs=mobilenet.input, outputs=[ mobilenet.get_layer('conv_pw_11_relu').output, mobilenet.get_layer('conv_pw_13_relu').output ]) conv4_3_norm, fc7 = FeatureExtractor(x1) print("conv11 shape: ", conv4_3_norm.shape) print("conv13 shape: ", fc7.shape) conv6_1 = Conv2D(256, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv14_1', use_bias=False)(fc7) conv6_1 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv14_1/bn')(conv6_1) conv6_1 = Activation('relu', name='relu_conv6_1')(conv6_1) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv14_2', use_bias=False)(conv6_1) conv6_2 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv14_2/bn')(conv6_2) conv6_2 = Activation('relu', name='relu_conv6_2')(conv6_2) print('conv14 shape', conv6_2.shape) conv7_1 = Conv2D(128, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv15_1', use_bias=False)(conv6_2) conv7_1 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv15_1/bn')(conv7_1) conv7_1 = Activation('relu', name='relu_conv7_1')(conv7_1) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv15_2', use_bias=False)(conv7_1) conv7_2 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv15_2/bn')(conv7_2) conv7_2 = Activation('relu', name='relu_conv7_2')(conv7_2) print('conv15 shape', conv7_2.shape) conv8_1 = Conv2D(128, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv16_1', use_bias=False)(conv7_2) conv8_1 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv16_1/bn')(conv8_1) conv8_1 = Activation('relu', name='relu_conv8_1')(conv8_1) conv8_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv8_padding')(conv8_1) conv8_2 = Conv2D(256, (3, 3), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv16_2', use_bias=False)(conv8_1) conv8_2 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv16_2/bn')(conv8_2) conv8_2 = Activation('relu', name='relu_conv8_2')(conv8_2) print('conv16 shape', conv8_2.shape) conv9_1 = Conv2D(64, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv17_1', use_bias=False)(conv8_2) conv9_1 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv17_1/bn')(conv9_1) conv9_1 = Activation('relu', name='relu_conv9_1')(conv9_1) conv9_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv9_padding')(conv9_1) conv9_2 = Conv2D(128, (3, 3), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv17_2', use_bias=False)(conv9_1) conv9_2 = BatchNormalization(momentum=0.99, epsilon=0.00001, name='conv17_2/bn')(conv9_2) conv9_2 = Activation('relu', name='relu_conv9_2')(conv9_2) print('conv17 shape', conv9_2.shape) # Feed conv4_3 into the L2 normalization layer # conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3_norm) conv4_3_norm_mbox_conf = Conv2D(n_boxes[0] * n_classes, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv11_mbox_conf')(conv4_3_norm) fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv13_mbox_conf')(fc7) conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv14_2_mbox_conf')(conv6_2) conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv15_2_mbox_conf')(conv7_2) conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv16_2_mbox_conf')(conv8_2) conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv17_2_mbox_conf')(conv9_2) # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv11_mbox_loc')(conv4_3_norm) fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv13_mbox_loc')(fc7) conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv14_2_mbox_loc')(conv6_2) conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv15_2_mbox_loc')(conv7_2) conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv16_2_mbox_loc')(conv8_2) conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv17_2_mbox_loc')(conv9_2) ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names) # Output shape of anchors: `(batch, height, width, n_boxes, 8)` conv4_3_norm_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) conv6_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) conv7_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) conv8_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[4], this_offsets=offsets[4], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) conv9_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[5], this_offsets=offsets[5], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) ### Reshape # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them conv4_3_norm_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) fc7_mbox_conf_reshape = Reshape( (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) conv6_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) conv7_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) conv8_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) conv9_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss conv4_3_norm_mbox_loc_reshape = Reshape( (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) fc7_mbox_loc_reshape = Reshape((-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) conv6_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) conv7_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) conv8_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) conv9_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` conv4_3_norm_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) fc7_mbox_priorbox_reshape = Reshape( (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) conv6_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) conv7_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) conv8_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) conv9_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) ### Concatenate the predictions from the different layers # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1, the number of boxes per layer # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) mbox_conf = Concatenate(axis=1, name='mbox_conf')([ conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape, conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape, conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape ]) # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) mbox_loc = Concatenate(axis=1, name='mbox_loc')([ conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape, conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape, conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape ]) # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8) mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([ conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape, conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape, conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape ]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first mbox_conf_softmax = Activation('softmax', name='mbox_conf_softmax')(mbox_conf) # Concatenate the class and box predictions and the anchors to one large predictions vector # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [mbox_conf_softmax, mbox_loc, mbox_priorbox]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetectionsFast( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: predictor_sizes = np.array([ conv4_3_norm_mbox_conf._keras_shape[1:3], fc7_mbox_conf._keras_shape[1:3], conv6_2_mbox_conf._keras_shape[1:3], conv7_2_mbox_conf._keras_shape[1:3], conv8_2_mbox_conf._keras_shape[1:3], conv9_2_mbox_conf._keras_shape[1:3] ]) return model, predictor_sizes else: return model
def create_lightweight_mobilenet(layer_name, number): base_model = MobileNet(weights='imagenet', include_top=False) #imports the mobilenet model #layer_name = 'conv_dw_12_relu' # intermediate_layer_model = Model(inputs=base_model.input, # outputs=base_model.get_layer(layer_name).output) # add a global spatial average pooling layer #x = base_model.output x = base_model.get_layer(layer_name).output x = GlobalAveragePooling2D()(x) # let's add a fully-connected layer x = Dense(1024, activation='relu')( x ) #we add dense layers so that the model can learn more complex functions and classify for better results. x = Dense(1024, activation='relu')(x) #dense layer 2 x = Dense(1024, activation='relu')(x) #dense layer 2 x = Dense(512, activation='relu')(x) #dense layer 3 # and a logistic layer -- let's say we have 20 voc classes preds = Dense(20, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=preds ) ##now a model has been created based on our architecture for i, layer in enumerate(base_model.layers): print('Original Model*****', i, layer.name) for i, layer in enumerate(model.layers): print('Final Model*****', i, layer.name) print(len(model.layers)) # first: train only the top layers (which were randomly initialized) # i.e. freeze all convolutional InceptionV3 layers for layer in base_model.layers: layer.trainable = False # the first 249 layers and unfreeze the rest: # for layer in model.layers[:20]: # layer.trainable = False # for layer in model.layers[20:]: # layer.trainable = True #opt = SGD(learning_rate=0.01, momentum=0.0, nesterov=False, name='SGD') model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.0, nesterov=False, name='SGD'), loss='categorical_crossentropy', metrics=['accuracy']) # Adam optimizer # loss function will be categorical cross entropy # evaluation metric will be accuracy # call the dataset train_datagen = ImageDataGenerator( preprocessing_function=preprocess_input) #included in our dependencies train_generator = train_datagen.flow_from_directory( '/home/dhaval/piyush/Usecases_dataset/voc_dataset_created/training_data', target_size=(224, 224), color_mode='rgb', batch_size=32, class_mode='categorical', shuffle=True) validation_generator = train_datagen.flow_from_directory( '/home/dhaval/piyush/Usecases_dataset/voc_dataset_created/validation_data', target_size=(224, 224), color_mode='rgb', batch_size=32, class_mode='categorical', shuffle=True) step_size_train = train_generator.n // train_generator.batch_size step_size_val = validation_generator.n // validation_generator.batch_size tensorboard = TensorBoard(log_dir="logs/{}".format(time()), update_freq='epoch', profile_batch=0) #fit the model model.fit(train_generator, steps_per_epoch=step_size_train, epochs=200, validation_data=validation_generator, validation_steps=step_size_val, callbacks=[tensorboard]) #model.fit(train_generator,steps_per_epoch=step_size_train,epochs=12) model.save('mobilenet_model_voc_20class_ep_200_sgd_layer_' + str(len(model.layers)) + '.h5')