def inception_block(x, filters=256):
    shrinkaged_filters = int(filters * INCEPTION_ENABLE_DEPTHWISE_SEPARABLE_CONV_SHRINKAGE)
    b0 = conv_bn_relu(x, filters=filters, kernel_size=(1, 1, 1))

    b1 = conv_bn_relu(x, filters=shrinkaged_filters, kernel_size=(1, 1, 1))
    b1 = conv_bn_relu(b1, filters=filters, kernel_size=(3, 3, 3))

    b2 = conv_bn_relu(x, filters=shrinkaged_filters, kernel_size=(1, 1, 1))
    b2 = conv_bn_relu(b2, filters=filters, kernel_size=(3, 3, 3))
    b2 = conv_bn_relu(b2, filters=filters, kernel_size=(3, 3, 3))

    b3 = AveragePooling3D(pool_size=(3, 3, 3), strides=(1, 1, 1), padding='same')(x)
    b3 = conv_bn_relu(b3, filters=filters, kernel_size=(1, 1, 1))

    bs = [b0, b1, b2, b3]

    print('inception_block')
    print(b0.get_shape())
    print(b1.get_shape())
    print(b2.get_shape())
    print(b3.get_shape())

    if INCEPTION_ENABLE_SPATIAL_SEPARABLE_CONV:
        b4 = conv_bn_relu(x, filters=shrinkaged_filters, kernel_size=(1, 1, 1))
        b4 = conv_bn_relu(b4, filters=filters, kernel_size=(5, 1, 1))
        b4 = conv_bn_relu(b4, filters=filters, kernel_size=(1, 5, 1))
        b4 = conv_bn_relu(b4, filters=filters, kernel_size=(1, 1, 5))
        bs.append(b4)
        print(b4.get_shape())

    x = Concatenate(axis=4)(bs)
    print(x.get_shape())

    return x
def reduction_block(x, filters=256):
    b0 = conv_bn_relu(x, filters=filters, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same')

    b1 = conv_bn_relu(x, filters=filters, kernel_size=(1, 1, 1))
    b1 = conv_bn_relu(b1, filters=filters, kernel_size=(3, 3, 3))
    b1 = conv_bn_relu(b1, filters=filters, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same')

    b2 = MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding='same')(x)
    b2 = conv_bn_relu(b2, filters=filters, kernel_size=(1, 1, 1))

    bs = [b0, b1, b2]

    print('reduction_block')
    print(b0.get_shape())
    print(b1.get_shape())
    print(b2.get_shape())

    if INCEPTION_ENABLE_SPATIAL_SEPARABLE_CONV:
        b3 = conv_bn_relu(x, filters=filters, kernel_size=(1, 1, 1))
        b3 = conv_bn_relu(b3, filters=filters, kernel_size=(5, 1, 1))
        b3 = conv_bn_relu(b3, filters=filters, kernel_size=(1, 5, 1))
        b3 = conv_bn_relu(b3, filters=filters, kernel_size=(1, 1, 5))
        b3 = conv_bn_relu(b3, filters=filters, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same')
        bs.append(b3)
        print(b3.get_shape())

    x = Concatenate(axis=4)(bs)
    print(x.get_shape())

    return x
    def build_model_combine_features(self, load_weight=False):
        cnn_branch = Sequential()
        cnn_branch.add(
            Conv2D(filters=16, kernel_size=5, strides=1, padding='valid', activation='relu', input_shape=(11, 11, 3),
                   name='Conv1'))
        cnn_branch.add(Conv2D(filters=24, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv2'))
        cnn_branch.add(Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv3'))
        cnn_branch.add(MaxPooling2D(pool_size=(3, 3), strides=2))
        cnn_branch.add(Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv4'))
        cnn_branch.add(MaxPooling2D(pool_size=(3, 3), strides=2))
        cnn_branch.add(Conv2D(filters=96, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv5'))
        cnn_branch.add(Flatten())

        location_branch = Sequential()
        location_branch.add(Dense(2, input_shape=(2,), activation='relu'))

        model = Concatenate([location_branch, cnn_branch])
        model.add(Dense(500, activation='relu'))
        model.add(Dense(2, activation='softmax'))
        model.compile(optimizer=Adam(lr=self.lr), loss='categorical_crossentropy', metrics=['accuracy'])

        if load_weight:
            print("Loading weight...")
            model.load_weight(WEIGHT_DIR + "")
            print("Weight loaded.")

        return model
def inception_base(x):
    x = conv_bn_relu(x, filters=32)
    x = conv_bn_relu(x, filters=32)
    x = conv_bn_relu(x, filters=64)

    b0 = MaxPooling3D(pool_size=(2, 2, 2))(x)
    b1 = conv_bn_relu(x, 64, strides=(2, 2, 2))
    x = Concatenate(axis=4)([b0, b1])

    print('inception_base')
    print(b0.get_shape())
    print(b1.get_shape())
    print(x.get_shape())

    return x
def dense_block(x):
    print('dense block')
    print(x.get_shape())

    for _ in range(DENSE_NET_BLOCK_LAYERS):
        y = x

        if DENSE_NET_ENABLE_BOTTLENETCK:
            y = bn_relu_conv(y, filters=DENSE_NET_GROWTH_RATE, kernel_size=(1, 1, 1))

        y = bn_relu_conv(y, filters=DENSE_NET_GROWTH_RATE, kernel_size=(3, 3, 3))
        x = Concatenate(axis=4)([x, y])
        print(x.get_shape())

    return x
    def use_pretrained_model(self):
        # For using pre-trained model
        # duplicate version of train function except for model.fit

        ###########
        ### ENCODER
        embedding_layer = Embedding(
            params['LEN_WORD2IDX_INPUTS'] + 1,
            params['EMBEDDING_DIM'],
            #weights=[embedding_matrix],
            input_length=params['MAX_LEN_INPUT'],
            # trainable=True
        )
        encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'], ))
        x = embedding_layer(encoder_inputs_placeholder)
        encoder = Bidirectional(
            LSTM(
                params['LATENT_DIM'],
                return_sequences=True,
                # dropout=0.5 # dropout not available on gpu
            ))
        encoder_outputs = encoder(x)

        ###########
        ### DECODER
        # this word embedding will not use pre-trained vectors, although you could
        decoder_embedding = Embedding(params['LEN_WORD2IDX_OUTPUTS'] + 1,
                                      params['EMBEDDING_DIM'])
        decoder_inputs_placeholder = Input(
            shape=(params['MAX_LEN_TARGET'], ))  # teacher forcing input
        decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder)

        #############
        ### ATTENTION
        # Attention layers need to be global (전역 변수) because they will be repeated Ty times at the decoder
        attn_repeat_layer = RepeatVector(params['MAX_LEN_INPUT'])
        attn_concat_layer = Concatenate(axis=-1)
        attn_dense1 = Dense(10, activation='tanh')
        attn_dense2 = Dense(1, activation=self._softmax_over_time)
        attn_dot = Dot(
            axes=1)  # to perform the weighted sum of alpha[t] * h[t]

        def _one_step_attention(h, st_1):
            # h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2)
            # st_1 = s(t-1), shape = (LATENT_DIM_DECODER,)

            # copy s(t-1) Tx times
            # now shape = (Tx, LATENT_DIM_DECODER)
            st_1 = attn_repeat_layer(st_1)

            # Concatenate all h(t)'s with s(t-1)
            # Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2)
            x = attn_concat_layer([h, st_1])

            # Neural net first layer
            x = attn_dense1(x)

            # Neural net second layer with special softmax over time
            alphas = attn_dense2(x)

            # "Dot" the alphas and the h's
            # Remember a.dot(b) = sum over a[t] * b[t]
            context = attn_dot([alphas, h])

            return context

        # define the rest of the decoder (after attention)
        decoder_lstm = LSTM(params['LATENT_DIM_DECODER'], return_state=True)
        decoder_dense = Dense(params['LEN_WORD2IDX_OUTPUTS'] + 1,
                              activation='softmax')

        initial_s = Input(shape=(params['LATENT_DIM_DECODER'], ), name='s0')
        initial_c = Input(shape=(params['LATENT_DIM_DECODER'], ), name='c0')
        context_last_word_concat_layer = Concatenate(
            axis=2)  # for teacher forcing

        # Unlike previous seq2seq, we cannot get the output all in one step
        # Instead we need to do Ty steps And in each of those steps, we need to consider all Tx h's

        # s, c will be re-assigned in each iteration of the loop
        s = initial_s
        c = initial_c

        # collect outputs in a list at first
        outputs = []
        # 원래 LSTM은 내부적으로 아래와 같은 for문을 진행하지만, 여기서 우리는 Context를 계산하기 위해서 manual하게 for문을 구성함.
        for t in range(params['MAX_LEN_TARGET']):  # Ty times

            ######################################################
            ## `one_step_attention` function !
            # get the context using attention
            context = _one_step_attention(encoder_outputs, s)

            # we need a different layer for each time step
            selector = Lambda(
                lambda x: x[:, t:t + 1]
            )  # 해당 time 벡터만 추출. 우리는 layer-wise로 코딩해야 되기 때문에 lambda를 사용.
            xt = selector(decoder_inputs_x)

            # combine
            decoder_lstm_input = context_last_word_concat_layer([context, xt])

            # pass the combined [context, last word] into the LSTM
            # along with [s, c]
            # get the new [s, c] and output
            o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[s, c])

            # final dense layer to get next word prediction
            decoder_outputs = decoder_dense(o)
            outputs.append(decoder_outputs)

        # make it a layer
        stacker = Lambda(self._stack_and_transpose)
        outputs = stacker(outputs)

        ################
        ### CREATE MODEL
        self.model = Model(inputs=[
            encoder_inputs_placeholder,
            decoder_inputs_placeholder,
            initial_s,
            initial_c,
        ],
                           outputs=outputs)

        # compile the model
        self.model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

        ## load pretrained model
        self.model.load_weights(PRETRAINED_MODEL_PATH)

        ##### Make predictions #####
        # As with the poetry example, we need to create another model
        # that can take in the RNN state and previous word as input
        # and accept a T=1 sequence.
        #encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'],))

        # The encoder will be stand-alone
        # From this we will get our initial decoder hidden state
        # i.e. h(1), ..., h(Tx)
        self.encoder_model = Model(encoder_inputs_placeholder, encoder_outputs)

        # next we define a T=1 decoder model
        encoder_outputs_as_input = Input(shape=(
            params['MAX_LEN_INPUT'],
            params['LATENT_DIM'] * 2,
        ))
        decoder_inputs_single = Input(shape=(1, ))
        decoder_inputs_single_x = decoder_embedding(decoder_inputs_single)

        # no need to loop over attention steps this time because there is only one step
        context = _one_step_attention(encoder_outputs_as_input, initial_s)

        # combine context with last word
        decoder_lstm_input = context_last_word_concat_layer(
            [context, decoder_inputs_single_x])

        # lstm and final dense
        o, s, c = decoder_lstm(decoder_lstm_input,
                               initial_state=[initial_s, initial_c])
        decoder_outputs = decoder_dense(o)

        # note: we don't really need the final stack and tranpose
        # because there's only 1 output
        # it is already of size N x D
        # no need to make it 1 x N x D --> N x 1 x D
        # time dimension이 1이기 때문에 자동으로 없어짐: 따라서, stack_and_transpose함수가 필요없음.

        # create the model object
        self.decoder_model = Model(inputs=[
            decoder_inputs_single, encoder_outputs_as_input, initial_s,
            initial_c
        ],
                                   outputs=[decoder_outputs, s, c])
Example #7
0
    def build_model_alt(self,
                        num_layers,
                        n_base_filters,
                        deconvolution,
                        use_bn=False):
        """
        Create a 3D Unet model with a variable number of layers and initial number of filters
        :param num_layers: number of layers (i.e. number of skip connections + 1)
        :param n_base_filters: number of filters to use in the first conv layer
        :param deconvolution: True for Deconvolution3D, False for UpSampling3D
        :param use_bn: True to use BatchNormalisation, False otherwise
        :return: Keras model
        """
        POOL_SIZE = (2, 2, 2)
        POOL_STRIDE = (2, 2, 2)
        CONV_KERNEL = (3, 3, 3)
        CONV_STRIDE = (1, 1, 1)
        DECONV_KERNEL = (2, 2, 2)
        DECONV_STRIDE = (2, 2, 2)
        UPSAMPLE_SIZE = (2, 2, 2)
        FEATURE_AXIS = -1

        self._title = "UNet3D_{}layer_{}flt_deconv{}".format(
            num_layers, n_base_filters, int(deconvolution))
        self._title += "_BN" if use_bn else ""

        inputs = self._input
        current_layer = inputs
        layers = list()

        # Contracting path
        for layer_ix in range(num_layers):
            # Two conv layers, note the difference in the number of filters
            contr_conv1 = Conv3D(filters=n_base_filters * (2**layer_ix),
                                 kernel_size=CONV_KERNEL,
                                 strides=CONV_STRIDE,
                                 padding="same",
                                 activation="relu",
                                 kernel_initializer="he_normal")(current_layer)
            if use_bn:
                contr_conv1 = BatchNormalization(
                    axis=FEATURE_AXIS)(contr_conv1)

            contr_conv2 = Conv3D(filters=n_base_filters * (2**layer_ix) * 2,
                                 kernel_size=CONV_KERNEL,
                                 strides=CONV_STRIDE,
                                 padding="same",
                                 activation="relu",
                                 kernel_initializer="he_normal")(contr_conv1)
            if use_bn:
                contr_conv2 = BatchNormalization(
                    axis=FEATURE_AXIS)(contr_conv2)

            # Do not include maxpooling in the final bottom layer
            if layer_ix < num_layers - 1:
                current_layer = MaxPooling3D(pool_size=POOL_SIZE,
                                             strides=POOL_STRIDE,
                                             padding="same")(contr_conv2)
                layers.append([contr_conv1, contr_conv2, current_layer])
            else:
                current_layer = contr_conv2
                layers.append([contr_conv1, contr_conv2])

        # Expanding path
        for layer_ix in range(num_layers - 2, -1, -1):
            if deconvolution:
                exp_deconv = Deconvolution3D(
                    filters=current_layer._keras_shape[-1],
                    kernel_size=DECONV_KERNEL,
                    strides=DECONV_STRIDE)(current_layer)
            else:
                exp_deconv = UpSampling3D(size=UPSAMPLE_SIZE)(current_layer)

            concat_layer = Concatenate(axis=FEATURE_AXIS)(
                [exp_deconv, layers[layer_ix][1]])
            current_layer = Conv3D(
                filters=layers[layer_ix][1]._keras_shape[FEATURE_AXIS],
                kernel_size=CONV_KERNEL,
                strides=CONV_STRIDE,
                padding="same",
                activation="relu",
                kernel_initializer="he_normal")(concat_layer)
            if use_bn:
                current_layer = BatchNormalization(
                    axis=FEATURE_AXIS)(current_layer)

            current_layer = Conv3D(
                filters=layers[layer_ix][1]._keras_shape[FEATURE_AXIS],
                kernel_size=CONV_KERNEL,
                strides=CONV_STRIDE,
                padding="same",
                activation="relu",
                kernel_initializer="he_normal")(current_layer)
            if use_bn:
                current_layer = BatchNormalization(
                    axis=FEATURE_AXIS)(current_layer)

        act = Conv3D(self._num_classes, (1, 1, 1),
                     activation="softmax",
                     padding="same",
                     kernel_initializer="he_normal")(current_layer)

        self._model = Model(inputs=[inputs], outputs=[act])

        return self._model
Example #8
0
    def get_generator(self):
        # Generator will take in the patched image, mask, sketch info, color_info and random noise
        inp = Input(shape=(self.vars.INP_SHAPE[0], self.vars.INP_SHAPE[1], 9))
        cnum = 64
        x1, mask1 = self.GatedConv2D(inp, cnum, (7, 7), (2, 2), use_lrn=False)
        x2, mask2 = self.GatedConv2D(x1, 2 * cnum, (5, 5), (2, 2))
        x3, mask3 = self.GatedConv2D(x2, 4 * cnum, (5, 5), (2, 2))
        x4, mask4 = self.GatedConv2D(x3, 8 * cnum, (3, 3), (2, 2))
        x5, mask5 = self.GatedConv2D(x4, 8 * cnum, (3, 3), (2, 2))
        x6, mask6 = self.GatedConv2D(x5, 8 * cnum, (3, 3), (2, 2))
        x7, mask7 = self.GatedConv2D(x6, 8 * cnum, (3, 3), (2, 2))

        x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=2)
        x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=4)
        x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=8)
        x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=16)

        x8, _ = self.GatedDeConv2D(x7, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0] / 64),
            int(self.vars.INP_SHAPE[1] / 64), 8 * cnum
        ])
        x8 = Concatenate(axis=0)([x6, x8])
        x8, mask8 = self.GatedConv2D(x8, 8 * cnum, (3, 3), (1, 1))

        x9, _ = self.GatedDeConv2D(x8, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0] / 32),
            int(self.vars.INP_SHAPE[1] / 32), 8 * cnum
        ])
        x9 = Concatenate(axis=0)([x5, x9])
        x9, mask9 = self.GatedConv2D(x9, 8 * cnum, (3, 3), (1, 1))

        x10, _ = self.GatedDeConv2D(x9, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0] / 16),
            int(self.vars.INP_SHAPE[1] / 16), 8 * cnum
        ])
        x10 = Concatenate(axis=0)([x4, x10])
        x10, mask10 = self.GatedConv2D(x10, 8 * cnum, (3, 3), (1, 1))

        x11, _ = self.GatedDeConv2D(x10, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0] / 8),
            int(self.vars.INP_SHAPE[1] / 8), 4 * cnum
        ])
        x11 = Concatenate(axis=0)([x3, x11])
        x11, mask11 = self.GatedConv2D(x11, 4 * cnum, (3, 3), (1, 1))

        x12, _ = self.GatedDeConv2D(x11, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0] / 4),
            int(self.vars.INP_SHAPE[1] / 4), 2 * cnum
        ])
        x12 = Concatenate(axis=0)([x2, x12])
        x12, mask12 = self.GatedConv2D(x12, 2 * cnum, (3, 3), (1, 1))

        x13, _ = self.GatedDeConv2D(x12, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0] / 2),
            int(self.vars.INP_SHAPE[1] / 2), cnum
        ])
        x13 = Concatenate(axis=0)([x1, x13])
        x13, mask13 = self.GatedConv2D(x13, cnum, (3, 3), (1, 1))

        x14, _ = self.GatedDeConv2D(x13, [
            self.vars.TRAIN_BATCH_SIZE,
            int(self.vars.INP_SHAPE[0]),
            int(self.vars.INP_SHAPE[1]), 9
        ])
        x14 = Concatenate(axis=0)([inp, x14])
        x14, mask14 = self.GatedConv2D(x14, 3, (3, 3), (1, 1))

        x14 = Activation('tanh')(x14)

        model = Model(inputs=inp, outputs=[x14, mask14])

        return model
Example #9
0
def Deeplabv3(weights='pascal_voc',
              input_tensor=None,
              infer=False,
              input_shape=(512, 512, 3),
              classes=21,
              backbone='mobilenetv2',
              OS=16,
              alpha=1.):
    """ Instantiates the Deeplabv3+ architecture
    Optionally loads weights pre-trained
    on PASCAL VOC. This model is available for TensorFlow only,
    and can only be used with inputs following the TensorFlow
    data format `(width, height, channels)`.
    # Arguments
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        backbone: backbone to use. one of {'xception','mobilenetv2'}
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone
    # Returns
        A Keras model instance.
    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`
    """

    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if K.backend() != 'tensorflow':
        raise RuntimeError('The Deeplabv3+ model is only available with '
                           'the TensorFlow backend.')

    if not (backbone in {'xception', 'mobilenetv2'}):
        raise ValueError('The `backbone` argument should be either '
                         '`xception`  or `mobilenetv2` ')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    batches_input = Lambda(lambda x: x / 127.5 - 1)(img_input)

    if backbone == 'xception':
        if OS == 8:
            entry_block3_stride = 1
            middle_block_rate = 2  # ! Not mentioned in paper, but required
            exit_block_rates = (2, 4)
            atrous_rates = (12, 24, 36)
        else:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
            atrous_rates = (6, 12, 18)
        x = Conv2D(32, (3, 3),
                   strides=(2, 2),
                   name='entry_flow_conv1_1',
                   use_bias=False,
                   padding='same')(batches_input)

        x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
        x = Activation('relu')(x)

        x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
        x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
        x = Activation('relu')(x)

        x = _xception_block(x, [128, 128, 128],
                            'entry_flow_block1',
                            skip_connection_type='conv',
                            stride=2,
                            depth_activation=False)
        x, skip1 = _xception_block(x, [256, 256, 256],
                                   'entry_flow_block2',
                                   skip_connection_type='conv',
                                   stride=2,
                                   depth_activation=False,
                                   return_skip=True)

        x = _xception_block(x, [728, 728, 728],
                            'entry_flow_block3',
                            skip_connection_type='conv',
                            stride=entry_block3_stride,
                            depth_activation=False)
        for i in range(16):
            x = _xception_block(x, [728, 728, 728],
                                'middle_flow_unit_{}'.format(i + 1),
                                skip_connection_type='sum',
                                stride=1,
                                rate=middle_block_rate,
                                depth_activation=False)

        x = _xception_block(x, [728, 1024, 1024],
                            'exit_flow_block1',
                            skip_connection_type='conv',
                            stride=1,
                            rate=exit_block_rates[0],
                            depth_activation=False)
        x = _xception_block(x, [1536, 1536, 2048],
                            'exit_flow_block2',
                            skip_connection_type='none',
                            stride=1,
                            rate=exit_block_rates[1],
                            depth_activation=True)

    else:
        OS = 8
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = Conv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2),
                   padding='same',
                   use_bias=False,
                   name='Conv')(batches_input)
        x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)

        x = Lambda(lambda x: relu(x, max_value=6.))(x)

        x = _inverted_res_block(x,
                                filters=16,
                                alpha=alpha,
                                stride=1,
                                expansion=1,
                                block_id=0,
                                skip_connection=False)

        x = _inverted_res_block(x,
                                filters=24,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=1,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=24,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=2,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=3,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=4,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=5,
                                skip_connection=True)

        # stride in block 6 changed from 2 -> 1, so we need to use rate = 2
        x = _inverted_res_block(
            x,
            filters=64,
            alpha=alpha,
            stride=1,  # 1!
            expansion=6,
            block_id=6,
            skip_connection=False)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=7,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=8,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=9,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=10,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=11,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=12,
                                skip_connection=True)

        x = _inverted_res_block(
            x,
            filters=160,
            alpha=alpha,
            stride=1,
            rate=2,  # 1!
            expansion=6,
            block_id=13,
            skip_connection=False)
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=14,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=15,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=320,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=16,
                                skip_connection=False)

    # end of feature extractor

    # branching for Atrous Spatial Pyramid Pooling

    # Image Feature branch
    #out_shape = int(np.ceil(input_shape[0] / OS))
    b4 = AveragePooling2D(pool_size=(int(np.ceil(input_shape[0] / OS)),
                                     int(np.ceil(input_shape[1] / OS))))(x)

    b4 = Conv2D(256, (1, 1),
                padding='same',
                use_bias=False,
                name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
    b4 = Activation('relu')(b4)

    b4 = Lambda(lambda x: K.tf.image.resize_bilinear(
        x,
        size=(int(np.ceil(input_shape[0] / OS)),
              int(np.ceil(input_shape[1] / OS)))))(b4)

    # simple 1x1
    b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
    b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
    b0 = Activation('relu', name='aspp0_activation')(b0)

    # there are only 2 branches in mobilenetV2. not sure why
    if backbone == 'xception':
        # rate = 6 (12)
        b1 = SepConv_BN(x,
                        256,
                        'aspp1',
                        rate=atrous_rates[0],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 12 (24)
        b2 = SepConv_BN(x,
                        256,
                        'aspp2',
                        rate=atrous_rates[1],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 18 (36)
        b3 = SepConv_BN(x,
                        256,
                        'aspp3',
                        rate=atrous_rates[2],
                        depth_activation=True,
                        epsilon=1e-5)

        # concatenate ASPP branches & project
        x = Concatenate()([b4, b0, b1, b2, b3])
    else:
        x = Concatenate()([b4, b0])

    x = Conv2D(256, (1, 1),
               padding='same',
               use_bias=False,
               name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
    x = Activation('relu')(x)
    x = Dropout(0.1)(x)

    # DeepLab v.3+ decoder

    if backbone == 'xception':
        # Feature projection
        # x4 (x2) block

        x = Lambda(lambda x: K.tf.image.resize_bilinear(
            x,
            size=(int(np.ceil(input_shape[0] / 4)),
                  int(np.ceil(input_shape[1] / 4)))))(x)

        dec_skip1 = Conv2D(48, (1, 1),
                           padding='same',
                           use_bias=False,
                           name='feature_projection0')(skip1)
        dec_skip1 = BatchNormalization(name='feature_projection0_BN',
                                       epsilon=1e-5)(dec_skip1)
        dec_skip1 = Activation('relu')(dec_skip1)
        x = Concatenate()([x, dec_skip1])
        x = SepConv_BN(x,
                       256,
                       'decoder_conv0',
                       depth_activation=True,
                       epsilon=1e-5)
        x = SepConv_BN(x,
                       256,
                       'decoder_conv1',
                       depth_activation=True,
                       epsilon=1e-5)

    # you can use it with arbitary number of classes
    if classes == 21:
        last_layer_name = 'logits_semantic'
    else:
        last_layer_name = 'custom_logits_semantic'

    x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x)
    x = Lambda(lambda x: K.tf.image.resize_bilinear(
        x, size=(input_shape[0], input_shape[1])))(x)
    if infer:
        x = Activation('softmax')(x)
    else:
        x = Reshape((input_shape[0] * input_shape[1], classes))(x)
        x = Activation('softmax')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = Model(inputs, x, name='deeplabv3p')

    # load weights

    if weights == 'pascal_voc':
        if backbone == 'xception':
            weights_path = get_file(
                'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH_X,
                cache_subdir='models')
        else:
            weights_path = get_file(
                'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH_MOBILE,
                cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    return model
                    activation=relu)(branch2)

    branch2 = AveragePooling2D()(branch2)

    branch2 = Conv2D(filters = 128,
                    kernel_size = (4, 2),
                    strides = (1,  1),
                    padding='same',
                    activation=relu)(branch2)

    branch2 = AveragePooling2D(pool_size = (5,1))(branch2)

    branch2 = Flatten()(branch2)

    #what is axis?
    layer = Concatenate(axis=1)([branch1, branch2])

    layer = Dropout(0.25)(layer)

    layer = Dense(units=200, activation=relu)(layer)

    layer = Dense(num_classes, activation='softmax')(layer)

    model = Model(input_layer, layer)

    

    #need to add softmax

    model.summary()
Example #11
0
# var_out = dense_layer(var_in)
# ```
#     * [Activation()](https://keras.io/layers/core/#activation)
# ```Python
# activation = activation_layer(var_in)
# ```
#     * [Dot()](https://keras.io/layers/merge/#dot)
# ```Python
# dot_product = dot_layer([var1,var2])
# ```

# In[6]:

# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation="tanh")
densor2 = Dense(1, activation="relu")
activator = Activation(
    softmax, name='attention_weights'
)  # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes=1)

# In[7]:

# GRADED FUNCTION: one_step_attention


def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
Example #12
0
def get_model(n_classes, input_height=224, input_width=224, weights=None):
    n_filters = 32
    kernel_size = 3
    dropout = 0.25

    img_input = Input(shape=(input_height, input_width, 3))  # Assume 224,224,3

    # Start with 224x224x3. Apply 3x3x16 Convolution, padding same and 2x2 Pooling. New dimensions:
    # 112x112x16
    c1 = conv2d_block(n_filters=n_filters * 1,
                      kernel_size=kernel_size,
                      input_tensor=img_input)
    p1 = MaxPooling2D((2, 2))(c1)
    d1 = Dropout(dropout)(p1)

    # 112x112x16. Apply 3x3x32 Convolution, padding same and 2x2 Pooling. New dimensions:
    # 56x56x32
    c2 = conv2d_block(n_filters=n_filters * 2,
                      kernel_size=kernel_size,
                      input_tensor=d1)
    p2 = MaxPooling2D((2, 2))(c2)
    d2 = Dropout(dropout)(p2)

    # 56x56x32. Apply 3x3x64 Convolution, padding same and 2x2 Pooling. New dimensions:
    # 28x28x64
    c3 = conv2d_block(n_filters=n_filters * 4,
                      kernel_size=kernel_size,
                      input_tensor=d2)
    p3 = MaxPooling2D((2, 2))(c3)
    d3 = Dropout(dropout)(p3)

    # 28x28x64. Apply 3x3x128 Convolution, padding same and 2x2 Pooling. New dimensions:
    # 14x14x128
    c4 = conv2d_block(n_filters=n_filters * 8,
                      kernel_size=kernel_size,
                      input_tensor=d3)
    p4 = MaxPooling2D((2, 2))(c4)
    d4 = Dropout(dropout)(p4)

    # 14x14x128. Apply 3x3x256 Convolution, padding same. New dimensions: 14x14x256
    c5 = conv2d_block(n_filters=n_filters * 16,
                      kernel_size=kernel_size,
                      input_tensor=d4)

    # Upsampling part starts here
    # Start with dimensions 14x14x256
    u6 = Conv2DTranspose(n_filters * 8,
                         kernel_size=(kernel_size, kernel_size),
                         strides=(2, 2),
                         padding='same')(c5)
    u6 = Concatenate()([u6, c4])
    d6 = Dropout(dropout)(u6)
    c6 = conv2d_block(n_filters * 8, kernel_size=3, input_tensor=d6)

    u7 = Conv2DTranspose(n_filters * 4,
                         kernel_size=(kernel_size, kernel_size),
                         strides=(2, 2),
                         padding='same')(c6)
    u7 = Concatenate()([u7, c3])
    d7 = Dropout(dropout)(u7)
    c7 = conv2d_block(n_filters * 4, kernel_size=3, input_tensor=d7)

    u8 = Conv2DTranspose(n_filters * 2,
                         kernel_size=(kernel_size, kernel_size),
                         strides=(2, 2),
                         padding='same')(c7)
    u8 = Concatenate()([u8, c2])
    d8 = Dropout(dropout)(u8)
    c8 = conv2d_block(n_filters * 2, kernel_size=3, input_tensor=d8)

    u9 = Conv2DTranspose(n_filters * 1,
                         kernel_size=(kernel_size, kernel_size),
                         strides=(2, 2),
                         padding='same')(c8)
    u9 = Concatenate()([u9, c1])
    d9 = Dropout(dropout)(u9)
    c9 = conv2d_block(n_filters * 1, kernel_size=3, input_tensor=d9)

    # Apply 1x1 convolution
    outputs = Conv2DTranspose(n_classes, (1, 1), activation='softmax')(c9)
    model = Model(inputs=[img_input], outputs=[outputs])

    model.summary()

    if weights is not None:
        model.load_weights(weights)

    opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    return model
Example #13
0
embedding = Embedding(input_dim=vocabulary_size,
                      output_dim=embedding_dim,
                      input_length=sequence_length)(inputs)

conv_list = list()
for ftr in filter_sizes:
    conv = Conv1D(num_filters,
                  kernel_size=ftr,
                  padding='same',
                  kernel_initializer='normal',
                  activation='relu')(embedding)
    maxpool = MaxPooling1D(pool_size=(sequence_length - ftr + 1),
                           padding='valid')(conv)
    conv_list.append(maxpool)

concatenated_tensor = Concatenate(axis=1)(conv_list)

flatten = Flatten()(concatenated_tensor)
dropout = Dropout(drop)(flatten)
output = Dense(units=2, activation='softmax')(dropout)

model = Model(inputs=inputs, outputs=output)

checkpoint = ModelCheckpoint('weights.{epoch:03d}-{val_acc:.4f}.hdf5',
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True,
                             mode='auto')
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam,
              loss='categorical_crossentropy',
Example #14
0
                       activation='relu')(xc14b)
xct1b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct1)
xct2 = Conv2DTranspose(512, (3, 3),
                       strides=(1, 1),
                       padding='same',
                       data_format=None,
                       activation='relu')(xct1b)
xct2b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct2)

inception1_12 = Conv2D(512, (1, 1),
                       padding='same',
                       activation='relu',
                       strides=(1, 1))(xmp12)
inception1_12b = BatchNormalization(axis=-1, momentum=0.99,
                                    epsilon=0.001)(inception1_12)
concat1 = Concatenate(axis=-1)([xct2b, inception1_12b])
xup3 = UpSampling2D(size=(2, 2), data_format=None)(concat1)
xct4 = Conv2DTranspose(512, (3, 3),
                       strides=(1, 1),
                       padding='same',
                       data_format=None,
                       activation='relu')(xup3)
xct4b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct4)
xct5 = Conv2DTranspose(512, (3, 3),
                       strides=(1, 1),
                       padding='same',
                       data_format=None,
                       activation='relu')(xct4b)
xct5b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct5)

inception1_9 = Conv2D(256, (1, 1),
Example #15
0
def DeeplabEncoder(input_shape=[(256, 256, 4), (128, 128, 4)],
                   classes=2,
                   backbone='xception',
                   OS=16,
                   alpha=1.):
    """ Instantiates the Deeplabv3+ architecture

    Optionally loads weights pre-trained
    on PASCAL VOC. This model is available for TensorFlow only,
    and can only be used with inputs following the TensorFlow
    data format `(width, height, channels)`.
    # Arguments
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        backbone: backbone to use. one of {'xception','mobilenetv2'}
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone

    # Returns
        A Keras model instance.

    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`

    """

    imgMainInput = Input(shape=(256, 256, 4), name='Input10')
    imgSideInput = Input(shape=(128, 128, 4), name='Input20')

    if backbone == 'xception':
        if OS == 8:
            entry_block3_stride = 1
            middle_block_rate = 2  # ! Not mentioned in paper, but required
            exit_block_rates = (2, 4)
            atrous_rates = (12, 24, 36)
        else:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
            atrous_rates = (6, 12, 18)

        x = Conv2D(32, (3, 3),
                   strides=(2, 2),
                   name='entry_flow_conv1_1',
                   use_bias=False,
                   padding='same')(imgMainInput)
        x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
        x = Activation('relu')(x)

        x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
        x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
        x = Activation('relu')(x)

        x = _xception_block(x, [128, 128, 128],
                            'entry_flow_block1',
                            skip_connection_type='conv',
                            stride=2,
                            depth_activation=False)
        x, skip1 = _xception_block(x, [256, 256, 256],
                                   'entry_flow_block2',
                                   skip_connection_type='conv',
                                   stride=2,
                                   depth_activation=False,
                                   return_skip=True)

        x = _xception_block(x, [728, 728, 728],
                            'entry_flow_block3',
                            skip_connection_type='conv',
                            stride=entry_block3_stride,
                            depth_activation=False)
        for i in range(8):
            x = _xception_block(x, [728, 728, 728],
                                'middle_flow_unit_{}'.format(i + 1),
                                skip_connection_type='sum',
                                stride=1,
                                rate=middle_block_rate,
                                depth_activation=False)

        x = _xception_block(x, [728, 1024, 1024],
                            'exit_flow_block1',
                            skip_connection_type='conv',
                            stride=1,
                            rate=exit_block_rates[0],
                            depth_activation=False)
        x = _xception_block(x, [1536, 1536, 2048],
                            'exit_flow_block2',
                            skip_connection_type='none',
                            stride=1,
                            rate=exit_block_rates[1],
                            depth_activation=True)

    # end of feature extractor

    # branching for Atrous Spatial Pyramid Pooling

    # Image Feature branch
    #out_shape = int(np.ceil(input_shape[0] / OS))
    b4 = AveragePooling2D(pool_size=(int(np.ceil(256 / OS)),
                                     int(np.ceil(256 / OS))))(x)
    b4 = Conv2D(256, (1, 1),
                padding='same',
                use_bias=False,
                name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
    b4 = Activation('relu')(b4)
    b4 = BilinearUpsampling(
        (int(np.ceil(256 / OS)), int(np.ceil(256 / OS))))(b4)

    # simple 1x1
    b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
    b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
    b0 = Activation('relu', name='aspp0_activation')(b0)

    # there are only 2 branches in mobilenetV2. not sure why
    if backbone == 'xception':
        # rate = 6 (12)
        b1 = SepConv_BN(x,
                        256,
                        'aspp1',
                        rate=atrous_rates[0],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 12 (24)
        b2 = SepConv_BN(x,
                        256,
                        'aspp2',
                        rate=atrous_rates[1],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 18 (36)
        b3 = SepConv_BN(x,
                        256,
                        'aspp3',
                        rate=atrous_rates[2],
                        depth_activation=True,
                        epsilon=1e-5)

        # concatenate ASPP branches & project
        x = Concatenate()([b4, b0, b1, b2, b3])

    x = Conv2D(256, (1, 1),
               padding='same',
               use_bias=False,
               name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
    x = GlobalAveragePooling2D()(x)

    x = Dense(2, activation='softmax')(x)

    model = Model([imgMainInput, imgSideInput], x, name='deeplabEncoder')

    return model
Example #16
0
    def _build_network(self, network_input, network_output,
                       additional_network_outputs):
        cluster_counts = list(self.data_provider.get_cluster_counts())

        # The simple loss cluster NN requires a specific output: a list of softmax distributions
        # First in this list are all softmax distributions for k=k_min for each object, then for k=k_min+1 for each
        # object etc. At the end, there is the cluster count output.

        # First we get an embedding for the network inputs
        embeddings = self._get_embedding(network_input)

        # Reshape all embeddings to 1d vectors
        # embedding_shape = self._embedding_nn.model.layers[-1].output_shape
        # embedding_size = np.prod(embedding_shape[1:])
        embedding_shape = embeddings[0].shape
        embedding_size = int(str(np.prod(embedding_shape[1:])))
        embedding_reshaper = self._s_layer(
            'embedding_reshape', lambda name: Reshape(
                (1, embedding_size), name=name))
        embeddings_reshaped = [
            embedding_reshaper(embedding) for embedding in embeddings
        ]

        # Merge all embeddings to one tensor
        embeddings_merged = self._s_layer(
            'embeddings_merge',
            lambda name: Concatenate(axis=1, name=name))(embeddings_reshaped)

        self._add_additional_prediction_output(embeddings_merged, 'Embeddings')

        # Use now some LSTM-layer to process all embeddings
        processed = embeddings_merged
        for i in range(self.__lstm_layers):
            processed = self._s_layer(
                'LSTM_proc_{}'.format(i),
                lambda name: Bidirectional(LSTM(self.__lstm_units,
                                                return_sequences=True),
                                           name=name))(processed)
            processed = self._s_layer(
                'LSTM_proc_{}_batch'.format(i),
                lambda name: BatchNormalization(name=name))(processed)

        # Split the tensor to seperate layers
        embeddings_processed = [
            self._s_layer('slice_{}'.format(i),
                          lambda name: slice_layer(processed, i, name))
            for i in range(len(network_input))
        ]

        # Create now two outputs: The cluster count and for each cluster count / object combination a softmax distribution.
        # These outputs are independent of each other, therefore it doesn't matter which is calculated first. Let us start
        # with the cluster count / object combinations.

        # First prepare some generally required layers
        layers = []
        for i in range(self.__output_dense_layers):
            layers += [
                self._s_layer(
                    'output_dense{}'.format(i),
                    lambda name: Dense(self.__output_dense_units, name=name)),
                self._s_layer('output_batch'.format(i),
                              lambda name: BatchNormalization(name=name)),
                LeakyReLU()
                # self._s_layer('output_relu'.format(i), lambda name: Activation(LeakyReLU(), name=name))
            ]
        cluster_softmax = {
            k: self._s_layer(
                'softmax_cluster_{}'.format(k),
                lambda name: Dense(k, activation='softmax', name=name))
            for k in cluster_counts
        }

        # Create now the outputs
        clusters_output = additional_network_outputs['clusters'] = {}
        for i in range(len(embeddings_processed)):
            embedding_proc = embeddings_processed[i]

            # Add the required layers
            for layer in layers:
                embedding_proc = layer(embedding_proc)

            input_clusters_output = clusters_output['input{}'.format(i)] = {}
            for k in cluster_counts:

                # Create now the required softmax distributions
                output_classifier = cluster_softmax[k](embedding_proc)
                input_clusters_output['cluster{}'.format(
                    k)] = output_classifier
                network_output.append(output_classifier)

        # Calculate the real cluster count
        cluster_count = self._s_layer(
            'cluster_count_LSTM_merge',
            lambda name: Bidirectional(LSTM(self.__lstm_units), name=name)
            (embeddings_merged))
        cluster_count = self._s_layer(
            'cluster_count_LSTM_merge_batch',
            lambda name: BatchNormalization(name=name))(cluster_count)
        for i in range(self.__cluster_count_dense_layers):
            cluster_count = self._s_layer(
                'cluster_count_dense{}'.format(i),
                lambda name: Dense(self.__cluster_count_dense_units, name=name
                                   ))(cluster_count)
            cluster_count = self._s_layer(
                'cluster_count_batch{}'.format(i),
                lambda name: BatchNormalization(name=name))(cluster_count)
            cluster_count = LeakyReLU()(cluster_count)
            # cluster_count = self._s_layer('cluster_count_relu{}'.format(i), lambda name: Activation(LeakyReLU(), name=name))(cluster_count)

        # The next layer is an output-layer, therefore the name must not be formatted
        cluster_count = self._s_layer(
            'cluster_count_output',
            lambda name: Dense(
                len(cluster_counts), activation='softmax', name=name),
            format_name=False)(cluster_count)
        additional_network_outputs['cluster_count_output'] = cluster_count

        network_output.append(cluster_count)

        return True
Example #17
0
def build_model(input_layer, start_neurons):
    # 128 -> 64
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(input_layer)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = Conv2D(start_neurons * 1, (3, 3),
                   activation="relu",
                   padding="same")(conv1)
    conv1 = BatchNormalization()(conv1)
    conv1 = Activation('relu')(conv1)
    pool1 = MaxPooling2D((2, 2))(conv1)

    # 64 -> 32
    pool1 = Conv2D(start_neurons * 2, (1, 1), padding="same",
                   use_bias=False)(pool1)
    res = Conv2D(start_neurons * 2, (1, 1),
                 strides=(2, 2),
                 padding="same",
                 use_bias=False)(pool1)
    res = BatchNormalization()(res)
    pool1 = DepthwiseConv2D((3, 3),
                            strides=(2, 2),
                            padding="same",
                            use_bias=False)(pool1)
    conv2 = Conv2D(start_neurons * 2, (3, 3),
                   activation="relu",
                   padding="same")(pool1)
    conv2 = Conv2D(start_neurons * 2, (3, 3),
                   activation="relu",
                   padding="same")(conv2)
    conv2 = Conv2D(start_neurons * 2, (3, 3),
                   activation="relu",
                   padding="same")(conv2)
    pool2 = add([conv2, res])
    # pool2 = MaxPooling2D((2, 2))(conv2)
    # pool2 = Dropout(0.5)(pool2)

    # 32 -> 16
    skip = BatchNormalization()(pool2)
    res3 = Conv2D(start_neurons * 4, (1, 1),
                  strides=(2, 2),
                  padding="same",
                  use_bias=False)(pool2)
    res3 = BatchNormalization()(res3)
    pool2 = DepthwiseConv2D((3, 3),
                            strides=(2, 2),
                            padding="same",
                            use_bias=False)(pool2)
    conv3 = Conv2D(start_neurons * 4, (3, 3),
                   activation="relu",
                   padding="same")(pool2)
    conv3 = Conv2D(start_neurons * 4, (1, 1),
                   activation="relu",
                   padding="same")(conv3)
    conv3 = Conv2D(start_neurons * 4, (1, 1),
                   activation="relu",
                   padding="same")(conv3)

    conv3 = Activation('relu')(conv3)
    # pool3 = MaxPooling2D((2, 2))(conv3)
    # pool3 = Dropout(0.5)(pool3)
    pool3 = add([conv3, res3])

    # 16 -> 8
    conv4 = Conv2D(start_neurons * 8, (3, 3),
                   activation="relu",
                   padding="same")(pool3)
    conv4 = Conv2D(start_neurons * 8, (3, 3),
                   activation="relu",
                   padding="same")(conv4)
    conv4 = Conv2D(start_neurons * 8, (1, 1),
                   activation="relu",
                   padding="same")(conv4)
    conv4 = BatchNormalization()(conv4)
    conv4 = Activation('relu')(conv4)
    x = MaxPooling2D((2, 2))(conv4)
    # pool4 = Dropout(0.5)(pool4)
    for i in range(6):
        x = res_xception_block(x, 256)
    # aspp
    x = aspp(x, input_shape, out_stride)
    x = Conv2D(256, (1, 1), padding="same", use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    # x = Dropout(0.9)(x)

    ##decoder
    x = BilinearUpsampling((4, 4))(x)
    dec_skip = Conv2D(256, (1, 1), padding="same", use_bias=False)(skip)
    dec_skip = BatchNormalization()(dec_skip)
    dec_skip = Activation("relu")(dec_skip)
    x = Concatenate()([x, dec_skip])

    x = DepthwiseConv2D((3, 3), padding="same", use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(256, (1, 1), padding="same", use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = DepthwiseConv2D((3, 3), padding="same", use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(256, (1, 1), padding="same", use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(1, (1, 1), padding="same")(x)
    x = BilinearUpsampling((4, 4))(x)
    x = Conv2D(1, (1, 1), padding="same", activation="sigmoid")(x)
    return x
Example #18
0
def build_model(lr, l2, activation='sigmoid'):
    ##############
    # BRANCH MODEL
    ##############
    regul = regularizers.l2(l2)
    optim = Adam(lr=lr)
    kwargs = {'padding': 'same', 'kernel_regularizer': regul}

    inp = Input(shape=img_shape)  # 384x384x1
    x = Conv2D(64, (9, 9), strides=2, activation='relu', **kwargs)(inp)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 96x96x64
    for _ in range(2):
        x = BatchNormalization()(x)
        x = Conv2D(64, (3, 3), activation='relu', **kwargs)(x)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 48x48x64
    x = BatchNormalization()(x)
    x = Conv2D(128, (1, 1), activation='relu', **kwargs)(x)  # 48x48x128
    for _ in range(4):
        x = subblock(x, 64, **kwargs)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 24x24x128
    x = BatchNormalization()(x)
    x = Conv2D(256, (1, 1), activation='relu', **kwargs)(x)  # 24x24x256
    for _ in range(4):
        x = subblock(x, 64, **kwargs)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 12x12x256
    x = BatchNormalization()(x)
    x = Conv2D(384, (1, 1), activation='relu', **kwargs)(x)  # 12x12x384
    for _ in range(4):
        x = subblock(x, 96, **kwargs)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 6x6x384
    x = BatchNormalization()(x)
    x = Conv2D(512, (1, 1), activation='relu', **kwargs)(x)  # 6x6x512
    for _ in range(4):
        x = subblock(x, 128, **kwargs)

    x = GlobalMaxPooling2D()(x)  # 512
    branch_model = Model(inp, x)

    ############
    # HEAD MODEL
    ############
    mid = 32
    xa_inp = Input(shape=branch_model.output_shape[1:])
    xb_inp = Input(shape=branch_model.output_shape[1:])
    x1 = Lambda(lambda x: x[0] * x[1])([xa_inp, xb_inp])
    x2 = Lambda(lambda x: x[0] + x[1])([xa_inp, xb_inp])
    x3 = Lambda(lambda x: K.abs(x[0] - x[1]))([xa_inp, xb_inp])
    x4 = Lambda(lambda x: K.square(x))(x3)
    x = Concatenate()([x1, x2, x3, x4])
    x = Reshape((4, branch_model.output_shape[1], 1), name='reshape1')(x)

    # Per feature NN with shared weight is implemented using CONV2D with appropriate stride.
    x = Conv2D(mid, (4, 1), activation='relu', padding='valid')(x)
    x = Reshape((branch_model.output_shape[1], mid, 1))(x)
    x = Conv2D(1, (1, mid), activation='linear', padding='valid')(x)
    x = Flatten(name='flatten')(x)

    # Weighted sum implemented as a Dense layer.
    x = Dense(1, use_bias=True, activation=activation,
              name='weighted-average')(x)
    head_model = Model([xa_inp, xb_inp], x, name='head')

    ########################
    # SIAMESE NEURAL NETWORK
    ########################
    # Complete model is constructed by calling the branch model on each input image,
    # and then the head model on the resulting 512-vectors.
    img_a = Input(shape=img_shape)
    img_b = Input(shape=img_shape)
    xa = branch_model(img_a)
    xb = branch_model(img_b)
    x = head_model([xa, xb])
    model = Model([img_a, img_b], x)
    model.compile(optim,
                  loss='binary_crossentropy',
                  metrics=['binary_crossentropy', 'acc'])
    return model, branch_model, head_model
Example #19
0
#==================================== Initilizing Model =====================

image_model = Sequential()
image_model.add(Dense(embedding_size, input_shape=(2048, ), activation='relu'))
image_model.add(RepeatVector(max_len))
language_model = Sequential()
language_model.add(
    Embedding(input_dim=vocab_size,
              output_dim=embedding_size,
              input_length=max_len))
language_model.add(LSTM(256, return_sequences=True))
language_model.add(TimeDistributed(Dense(embedding_size)))

# Concatinating our model

conca = Concatenate()([image_model.output, language_model.output])
x = LSTM(128, return_sequences=True)(conca)
x = LSTM(512, return_sequences=False)(x)
x = Dense(vocab_size)(x)
out = Activation('softmax')(x)
model = Model(inputs=[image_model.input, language_model.input], outputs=out)

model.compile(loss='categorical_crossentropy',
              optimizer='RMSprop',
              metrics=['accuracy'])

# loading weights of of our pre-traind model
model.load_weights('model.h5')
print("=" * 150)
print("MODEL LOADED")
Example #20
0
File: sdne.py Project: adalisan/GEM
    def learn_embedding(self, graph=None, edge_f=None,
                        is_weighted=False, no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        S = nx.to_scipy_sparse_matrix(graph)
        
        print ("S graph adj mat ")
        print (S.shape)
        print ("S graph adj mat  maximum")
        print (np.amax(S))
        self._node_num = graph.number_of_nodes()

        print ("S graph nodes")
        print (self._node_num)
        t1 = time()
        S = (S + S.T) / 2
        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        # If cannot use previous step information, initialize new models
        self._encoder = get_encoder(self._node_num, self._d,
                                    self._K, self._n_units,
                                    self._nu1, self._nu2,
                                    self._actfn)
        self._decoder = get_decoder(self._node_num, self._d,
                                    self._K, self._n_units,
                                    self._nu1, self._nu2,
                                    self._actfn)
        self._autoencoder = get_autoencoder(self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(2 * self._node_num,), name='x_in')
        x1 = Lambda(
            lambda x: x[:, 0:self._node_num],
            output_shape=(self._node_num,)
        )(x_in)
        #x1= x_in[:, 0:self._node_num]
        x2 = Lambda(
            lambda x: x[:, self._node_num:2 * self._node_num],
            output_shape=(self._node_num,)
        )(x_in)
        #x2= x_in[:, self._node_num:2 * self._node_num]

        # Process inputs
        [x_hat1, y1] = self._autoencoder(x1)
        [x_hat2, y2] = self._autoencoder(x2)
        # Outputs
        # x_diff1 = merge([x_hat1, x1],
        #                 mode=lambda ab: ab[0] - ab[1],
        #                 output_shape=lambda L: L[1])
        # x_diff1 = Subtract() ([x_hat1, x1])
                        
        # x_diff2 = merge([x_hat2, x2],
        #                 mode=lambda ab: ab[0] - ab[1],
        #                 output_shape=lambda L: L[1])
        # y_diff = merge([y2, y1],
        #                mode=lambda ab: ab[0] - ab[1],
        #                output_shape=lambda L: L[1])

        x_diff1 = Subtract() ([x_hat1, x1])
        x_diff2 = Subtract() ([x_hat2, x2])
        y_diff = Subtract() ([y2, y1])
        #dummy_1 = KBack.constant(0.0,shape=(1, None))
        #dummy_2 = KBack.constant(0.0,shape=(1, None))
        dummy_1=KBack.sum(x_diff1,axis=1,keepdims=True)
        dummy_2=KBack.sum(x_diff2,axis=1,keepdims=True)
        #dummy_2=KBack.zeros(shape=(1,None))
        z_diff1 = Concatenate(axis = 1)([x_diff1,dummy_1])
        z_diff2 = Concatenate(axis = 1)([x_diff2,dummy_2])
        #print (x_diff1.get_config())
        #print (x_diff2.get_config())
        #print (y_diff._keras_shape)
        #tf.Print (x_diff2, [KBack.shape(x_diff2)])
        #tf.Print (y_diff, [KBack.shape(y_diff)])
        # Objectives
        def weighted_mse_x(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains [b, deg]
            '''
            return KBack.sum(
                KBack.square(y_pred * y_true[:, 0:self._node_num]),
                axis=-1) / y_true[:, self._node_num]

        def weighted_mse_y(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
            y_pred: Contains y2 - y1
            y_true: Contains s12
            '''
            min_batch_size = KBack.shape(y_true)[0]
            return KBack.reshape(
                KBack.sum(KBack.square(y_pred), axis=-1),
                [min_batch_size, 1]
            ) * y_true

        # Model
        self._model = Model(input=x_in, output=[z_diff1, z_diff2, y_diff])
        #print (self._model.summary())
        
        # print (self._model.get_layer('merge_1').input_shape)
        # print (self._model.get_layer('merge_1').output_shape)
        # print (self._model.get_layer('merge_2').input_shape)
        # print (self._model.get_layer('merge_2').output_shape)
        # print (self._model.get_layer('model_3').output_shape)
        sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True)
        # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(
            optimizer=sgd,
            loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y],
            loss_weights=[1, 1, self._alpha]
        )

        self._model.fit_generator(
            generator=batch_generator_sdne(S, self._beta, self._n_batch, True),
            nb_epoch=self._num_iter,
            samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch,
            verbose=1
        )
        # Get embedding for all points
        self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch)
        t2 = time()
        # Save the autoencoder and its weights
        if(self._weightfile is not None):
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if(self._modelfile is not None):
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if(self._savefilesuffix is not None):
            saveweights(
                self._encoder,
                'encoder_weights_' + self._savefilesuffix + '.hdf5'
            )
            saveweights(
                self._decoder,
                'decoder_weights_' + self._savefilesuffix + '.hdf5'
            )
            savemodel(
                self._encoder,
                'encoder_model_' + self._savefilesuffix + '.json'
            )
            savemodel(
                self._decoder,
                'decoder_model_' + self._savefilesuffix + '.json'
            )
            # Save the embedding
            np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y)
        return self._Y, (t2 - t1)
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(
        shape=(3, ), dtype='int32', buffer=weights_file.read(12))
    if (major*10+minor)>=2 and major<1000 and minor<1000:
        seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    input_layer = Input(shape=(None, None, 3))
    prev_layer = input_layer
    all_layers = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    out_index = []
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            if stride>1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)
            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    pool_size=(size, size),
                    strides=(stride, stride),
                    padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            assert activation == 'linear', 'Only linear activation supported.'
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            out_index.append(len(all_layers)-1)
            all_layers.append(None)
            prev_layer = all_layers[-1]

        elif section.startswith('net'):
            pass

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    if len(out_index)==0: out_index.append(len(all_layers)-1)
    model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
    print(model.summary())
    if args.weights_only:
        model.save_weights('{}'.format(output_path))
        print('Saved Keras weights to {}'.format(output_path))
    else:
        model.save('{}'.format(output_path))
        print('Saved Keras model to {}'.format(output_path))

    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count +
                                                       remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))
Example #22
0
    def get_unet(self):
        inputs = Input((self.img_rows, self.img_cols, 1))
        # 网络结构定义
        '''
		#unet with crop(because padding = valid) 

		conv1 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(inputs)
		print "conv1 shape:",conv1.shape
		conv1 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv1)
		print "conv1 shape:",conv1.shape
		crop1 = Cropping2D(cropping=((90,90),(90,90)))(conv1)
		print "crop1 shape:",crop1.shape
		pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
		print "pool1 shape:",pool1.shape

		conv2 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool1)
		print "conv2 shape:",conv2.shape
		conv2 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv2)
		print "conv2 shape:",conv2.shape
		crop2 = Cropping2D(cropping=((41,41),(41,41)))(conv2)
		print "crop2 shape:",crop2.shape
		pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
		print "pool2 shape:",pool2.shape

		conv3 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool2)
		print "conv3 shape:",conv3.shape
		conv3 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv3)
		print "conv3 shape:",conv3.shape
		crop3 = Cropping2D(cropping=((16,17),(16,17)))(conv3)
		print "crop3 shape:",crop3.shape
		pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
		print "pool3 shape:",pool3.shape

		conv4 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool3)
		conv4 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv4)
		drop4 = Dropout(0.5)(conv4)
		crop4 = Cropping2D(cropping=((4,4),(4,4)))(drop4)
		pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

		conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool4)
		conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv5)
		drop5 = Dropout(0.5)(conv5)

		up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5))
		merge6 = merge([crop4,up6], mode = 'concat', concat_axis = 3)
		conv6 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge6)
		conv6 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv6)

		up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
		merge7 = merge([crop3,up7], mode = 'concat', concat_axis = 3)
		conv7 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge7)
		conv7 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv7)

		up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
		merge8 = merge([crop2,up8], mode = 'concat', concat_axis = 3)
		conv8 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge8)
		conv8 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv8)

		up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
		merge9 = merge([crop1,up9], mode = 'concat', concat_axis = 3)
		conv9 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge9)
		conv9 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv9)
		conv9 = Conv2D(2, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv9)
		'''

        conv1 = Conv2D(64,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(inputs)
        print("conv1 shape:", conv1.shape)
        conv1 = Conv2D(64,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv1)
        print("conv1 shape:", conv1.shape)
        pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
        print("pool1 shape:", pool1.shape)

        conv2 = Conv2D(128,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(pool1)
        print("conv2 shape:", conv2.shape)
        conv2 = Conv2D(128,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv2)
        print("conv2 shape:", conv2.shape)
        pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
        print("pool2 shape:", pool2.shape)

        conv3 = Conv2D(256,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(pool2)
        print("conv3 shape:", conv3.shape)
        conv3 = Conv2D(256,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv3)
        print("conv3 shape:", conv3.shape)
        pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
        print("pool3 shape:", pool3.shape)

        conv4 = Conv2D(512,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(pool3)
        conv4 = Conv2D(512,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv4)
        drop4 = Dropout(0.5)(conv4)
        pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

        conv5 = Conv2D(1024,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(pool4)
        conv5 = Conv2D(1024,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv5)
        drop5 = Dropout(0.5)(conv5)

        up6 = Conv2D(512,
                     2,
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal')(
                         UpSampling2D(size=(2, 2))(drop5))
        merge6 = Concatenate(axis=3)([drop4, up6])
        conv6 = Conv2D(512,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(merge6)
        conv6 = Conv2D(512,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv6)

        up7 = Conv2D(256,
                     2,
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal')(
                         UpSampling2D(size=(2, 2))(conv6))
        merge7 = Concatenate(axis=3)([conv3, up7])
        conv7 = Conv2D(256,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(merge7)
        conv7 = Conv2D(256,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv7)

        up8 = Conv2D(128,
                     2,
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal')(
                         UpSampling2D(size=(2, 2))(conv7))
        merge8 = Concatenate(axis=3)([conv2, up8])
        conv8 = Conv2D(128,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(merge8)
        conv8 = Conv2D(128,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv8)

        up9 = Conv2D(64,
                     2,
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal')(
                         UpSampling2D(size=(2, 2))(conv8))
        merge9 = Concatenate(axis=3)([conv1, up9])
        conv9 = Conv2D(64,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(merge9)
        conv9 = Conv2D(64,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv9)
        conv9 = Conv2D(2,
                       3,
                       activation='relu',
                       padding='same',
                       kernel_initializer='he_normal')(conv9)
        conv10 = Conv2D(1, 1, activation='sigmoid')(conv9)

        model = Model(input=inputs, output=conv10)
        model.compile(optimizer=Adam(lr=1e-4),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        return model
Example #23
0
                            WORD_EMBEDDING_LENGTH,
                            border_mode='valid',
                            activation='relu',
                            name='hate_anger_conv_4')(reshape)
    maxpool_4 = MaxPooling2D(pool_size=(INPUT_WORDS - 4 + 1, 1),
                             name='hate_anger_pooling_4')(ngram_4)
    ngram_5 = Convolution2D(50,
                            5,
                            WORD_EMBEDDING_LENGTH,
                            border_mode='valid',
                            activation='relu',
                            name='hate_anger_conv_5')(reshape)
    maxpool_5 = MaxPooling2D(pool_size=(INPUT_WORDS - 5 + 1, 1),
                             name='hate_anger_pooling_5')(ngram_5)
    #1 branch again
    merged = Concatenate(axis=2)([maxpool_2, maxpool_3, maxpool_4, maxpool_5])
    flatten = Flatten(name='hate_anger_flatten')(merged)
    #    batch_norm = BatchNormalization()(flatten)
    dense_1 = Dense(256, activation='relu', name='hate_anger_dense_1')(flatten)
    drop_1 = Dropout(0.8, name='hate_anger_drop_1')(dense_1)
    dense_2 = Dense(256, activation='relu', name='hate_anger_dense_2')(drop_1)
    drop_2 = Dropout(0.8, name='hate_anger_drop_2')(dense_2)
    output_irony = Dense(3, activation='softmax', name='main_output')(drop_2)
    model = Model(input=[input_words], output=[output_irony])
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', 'mse', 'mae'])
    print 'Model built'
    print(model.summary())
    sys.stdout.flush()
Example #24
0
actor.add(Flatten(input_shape=(1, ) + observation_shape))
actor.add(Dense(400))
actor.add(Activation('relu'))
actor.add(Dense(300))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('tanh'))
print(actor.summary())

action_input = Input(shape=(nb_actions, ), name='action_input')
observation_input = Input(shape=(1, ) + observation_shape,
                          name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Dense(400)(flattened_observation)
x = Activation('relu')(x)
x = Concatenate()([x, action_input])
x = Dense(300)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions,
def build_full_contour_integration_model(weights_file=None,
                                         rf_size=35,
                                         inner_leaky_relu_alpha=0.9,
                                         outer_leaky_relu_alpha=1.,
                                         l1_reg_loss_weight=0.0005):
    """

    Build the full contour integration Alexnet Model
    Note:[1] Model needs to be complied fore use.
         [2] The name of the layers after the contour integration layer are changed from alexnet
             so weights of alexnet can be loaded safely.

    :param weights_file:
    :param rf_size:
    :param inner_leaky_relu_alpha:
    :param outer_leaky_relu_alpha:
    :param l1_reg_loss_weight:
    :return:
    """
    input_layer = Input(shape=(3, 227, 227))

    conv_1 = Conv2D(96, (11, 11),
                    strides=(4, 4),
                    activation='relu',
                    name='conv_1')(input_layer)

    contour_integrate_layer = ContourIntegrationLayer3D(
        tgt_filt_idx=0,  # not important for full model
        rf_size=rf_size,
        inner_leaky_relu_alpha=inner_leaky_relu_alpha,
        outer_leaky_relu_alpha=outer_leaky_relu_alpha,
        l1_reg_loss_weight=l1_reg_loss_weight,
        name='contour_integration_layer')(conv_1)

    conv_2 = MaxPooling2D((3, 3), strides=(2, 2))(contour_integrate_layer)
    conv_2 = alex_net.crosschannelnormalization(
        name='Contrast_Normalization')(conv_2)
    conv_2 = ZeroPadding2D((2, 2))(conv_2)

    conv_2_1 = Conv2D(128, (5, 5), activation='relu', name='conv_22_1') \
        (alex_net.splittensor(ratio_split=2, id_split=0)(conv_2))
    conv_2_2 = Conv2D(128, (5, 5), activation='relu', name='conv_22_2') \
        (alex_net.splittensor(ratio_split=2, id_split=1)(conv_2))
    conv_2 = Concatenate(axis=1, name='conv_22')([conv_2_1, conv_2_2])

    conv_3 = MaxPooling2D((3, 3), strides=(2, 2))(conv_2)
    conv_3 = alex_net.crosschannelnormalization()(conv_3)
    conv_3 = ZeroPadding2D((1, 1))(conv_3)
    conv_3 = Conv2D(384, (3, 3), activation='relu', name='conv_33')(conv_3)

    conv_4 = ZeroPadding2D((1, 1))(conv_3)
    conv_4_1 = Conv2D(192, (3, 3), activation='relu', name='conv_44_1') \
        (alex_net.splittensor(ratio_split=2, id_split=0)(conv_4))
    conv_4_2 = Conv2D(192, (3, 3), activation='relu', name='conv_44_2') \
        (alex_net.splittensor(ratio_split=2, id_split=1)(conv_4))
    conv_4 = Concatenate(axis=1, name='conv_44')([conv_4_1, conv_4_2])

    conv_5 = ZeroPadding2D((1, 1))(conv_4)
    conv_5_1 = Conv2D(128, (3, 3), activation='relu', name='conv_55_1') \
        (alex_net.splittensor(ratio_split=2, id_split=0)(conv_5))
    conv_5_2 = Conv2D(128, (3, 3), activation='relu', name='conv_55_2') \
        (alex_net.splittensor(ratio_split=2, id_split=1)(conv_5))
    conv_5 = Concatenate(axis=1, name='conv_55')([conv_5_1, conv_5_2])

    dense_1 = MaxPooling2D((3, 3), strides=(2, 2), name='convpool_5')(conv_5)
    dense_1 = Flatten(name='flatten')(dense_1)
    dense_1 = Dense(4096, activation='relu', name='dense_11')(dense_1)

    dense_2 = Dropout(0.5)(dense_1)
    dense_2 = Dense(4096, activation='relu', name='dense_22')(dense_2)

    dense_3 = Dropout(0.5)(dense_2)
    dense_3 = Dense(1000, name='dense_33')(dense_3)
    prediction = Activation('softmax', name='softmax')(dense_3)

    model = Model(inputs=input_layer, outputs=prediction)

    if weights_file:
        model.load_weights(weights_file)

    return model
Example #26
0
               output_shape=output_shape_words)(I)
tags = Lambda(function=lambda x: x[:, 20:20 + 20],
              output_shape=output_shape_tags)(I)
labels = Lambda(function=lambda x: x[:, 40:40 + 41],
                output_shape=output_shape_labels)(I)

embedding_words = Reshape(target_shape=(20 * 64, ))(Embedding(
    input_dim=len(index_of_words),
    output_dim=64,
    input_length=20,
)(words))
embedding_tags = Reshape(target_shape=(32 * 20, ))(Embedding(
    input_dim=len(index_of_pos), output_dim=32, input_length=20)(tags))
embedding_labels = Reshape(target_shape=(32 * 12, ))(Embedding(
    input_dim=len(index_of_labels), output_dim=32, input_length=12)(labels))
embeddings = Concatenate(axis=1)(
    [embedding_words, embedding_tags, embedding_labels])

h1 = Dense(units=500, activation='relu')(embeddings)
h1_do = Dropout(0.3)(h1)
h2 = Dense(units=500, activation='relu')(h1_do)
h2_do = Dropout(0.3)(h2)
q = Dense(units=93, activation='softmax')(h2_do)
model = Model(inputs=[I], outputs=[q])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
print(model.summary())
model.fit(train_data, train_labels, epochs=15, batch_size=1000)
model.save(filepath='./models/model3')
Example #27
0
def make_megnet_model(nfeat_edge=None,
                      nfeat_global=None,
                      nfeat_node=None,
                      nblocks=3,
                      n1=64,
                      n2=32,
                      n3=16,
                      nvocal=95,
                      embedding_dim=16,
                      nbvocal=None,
                      bond_embedding_dim=None,
                      ngvocal=None,
                      global_embedding_dim=None,
                      npass=3,
                      ntarget=1,
                      act=softplus2,
                      is_classification=False,
                      l2_coef=None,
                      dropout=None,
                      dropout_on_predict=False):
    """Make a MEGNet Model

    Args:
        nfeat_edge: (int) number of bond features
        nfeat_global: (int) number of state features
        nfeat_node: (int) number of atom features
        nblocks: (int) number of MEGNetLayer blocks
        n1: (int) number of hidden units in layer 1 in MEGNetLayer
        n2: (int) number of hidden units in layer 2 in MEGNetLayer
        n3: (int) number of hidden units in layer 3 in MEGNetLayer
        nvocal: (int) number of total element
        embedding_dim: (int) number of embedding dimension
        nbvocal: (int) number of bond types if bond attributes are types
        bond_embedding_dim: (int) number of bond embedding dimension
        ngvocal: (int) number of global types if global attributes are types
        global_embedding_dim: (int) number of global embedding dimension
        npass: (int) number of recurrent steps in Set2Set layer
        ntarget: (int) number of output targets
        act: (object) activation function
        l2_coef: (float or None) l2 regularization parameter
        is_classification: (bool) whether it is a classification task
        dropout: (float) dropout rate
        dropout_on_predict (bool): Whether to use dropout during prediction and training
    Returns:
        (Model) Keras model, ready to run
    """

    # Get the setting for the training kwarg of Dropout
    dropout_training = True if dropout_on_predict else None

    # Create the input blocks
    int32 = 'int32'
    if nfeat_node is None:
        x1 = Input(shape=(None, ), dtype=int32)  # only z as feature
        x1_ = Embedding(nvocal, embedding_dim)(x1)
    else:
        x1 = Input(shape=(None, nfeat_node))
        x1_ = x1
    if nfeat_edge is None:
        x2 = Input(shape=(None, ), dtype=int32)
        x2_ = Embedding(nbvocal, bond_embedding_dim)(x2)
    else:
        x2 = Input(shape=(None, nfeat_edge))
        x2_ = x2
    if nfeat_global is None:
        x3 = Input(shape=(None, ), dtype=int32)
        x3_ = Embedding(ngvocal, global_embedding_dim)(x3)
    else:
        x3 = Input(shape=(None, nfeat_global))
        x3_ = x3
    x4 = Input(shape=(None, ), dtype=int32)
    x5 = Input(shape=(None, ), dtype=int32)
    x6 = Input(shape=(None, ), dtype=int32)
    x7 = Input(shape=(None, ), dtype=int32)
    if l2_coef is not None:
        reg = l2(l2_coef)
    else:
        reg = None

    # two feedforward layers
    def ff(x, n_hiddens=[n1, n2]):
        out = x
        for i in n_hiddens:
            out = Dense(i, activation=act, kernel_regularizer=reg)(out)
        return out

    # a block corresponds to two feedforward layers + one MEGNetLayer layer
    # Note the first block does not contain the feedforward layer since
    # it will be explicitly added before the block
    def one_block(a, b, c, has_ff=True):
        if has_ff:
            x1_ = ff(a)
            x2_ = ff(b)
            x3_ = ff(c)
        else:
            x1_ = a
            x2_ = b
            x3_ = c
        out = MEGNetLayer(
            [n1, n1, n2], [n1, n1, n2], [n1, n1, n2],
            pool_method='mean',
            activation=act,
            kernel_regularizer=reg)([x1_, x2_, x3_, x4, x5, x6, x7])

        x1_temp = out[0]
        x2_temp = out[1]
        x3_temp = out[2]
        if dropout:
            x1_temp = Dropout(dropout)(x1_temp, training=dropout_training)
            x2_temp = Dropout(dropout)(x2_temp, training=dropout_training)
            x3_temp = Dropout(dropout)(x3_temp, training=dropout_training)
        return x1_temp, x2_temp, x3_temp

    x1_ = ff(x1_)
    x2_ = ff(x2_)
    x3_ = ff(x3_)
    for i in range(nblocks):
        if i == 0:
            has_ff = False
        else:
            has_ff = True
        x1_1 = x1_
        x2_1 = x2_
        x3_1 = x3_
        x1_1, x2_1, x3_1 = one_block(x1_1, x2_1, x3_1, has_ff)
        # skip connection
        x1_ = Add()([x1_, x1_1])
        x2_ = Add()([x2_, x2_1])
        x3_ = Add()([x3_, x3_1])
    # set2set for both the atom and bond
    node_vec = Set2Set(T=npass, n_hidden=n3, kernel_regularizer=reg)([x1_, x6])
    edge_vec = Set2Set(T=npass, n_hidden=n3, kernel_regularizer=reg)([x2_, x7])
    # concatenate atom, bond, and global
    final_vec = Concatenate(axis=-1)([node_vec, edge_vec, x3_])
    if dropout:
        final_vec = Dropout(dropout)(final_vec, training=dropout_training)
    # final dense layers
    final_vec = Dense(n2, activation=act, kernel_regularizer=reg)(final_vec)
    final_vec = Dense(n3, activation=act, kernel_regularizer=reg)(final_vec)
    if is_classification:
        final_act = 'sigmoid'
    else:
        final_act = None
    out = Dense(ntarget, activation=final_act)(final_vec)
    model = Model(inputs=[x1, x2, x3, x4, x5, x6, x7], outputs=out)
    return model
Example #28
0
def create_model(
    inshape: Tuple[int, int, int],
    n_classes: int,
    output_activation: str,
    block_structure: Tuple[int, ...] = config_sections.DEFAULT_BLOCK_STRUCTURE,
    filters: int = config_sections.DEFAULT_FILTERS,
    internal_activation: str = config_sections.DEFAULT_INTERNAL_ACTIVATION,
    kernel_size: Tuple[int, int] = config_sections.DEFAULT_KERNEL_SIZE,
    padding: str = config_sections.DEFAULT_PADDING,
    pool_size: Tuple[int, int] = config_sections.DEFAULT_POOL_SIZE,
    use_batch_norm: bool = config_sections.DEFAULT_USE_BATCH_NORM,
    use_growth: bool = config_sections.DEFAULT_USE_GROWTH,
    use_initial_colorspace_transformation_layer: bool = config_sections.
    DEFAULT_USE_INITIAL_COLORSPACE_TRANSFORMATION_LAYER,
) -> keras.models.Model:

    conv2d_options = {
        "filters": filters,
        "kernel_size": kernel_size,
        "padding": padding,
        "activation": internal_activation,
        "use_batch_norm": use_batch_norm,
    }

    transition_options = conv2d_options.copy()
    transition_options["kernel_size"] = (1, 1)

    # Initial convolution
    inlayer = keras.layers.Input(shape=inshape)
    encoder = inlayer

    # Optional colorspace transformation (not in block format)
    if use_initial_colorspace_transformation_layer:
        encoder = network_sections.colorspace_transformation(
            inshape, encoder, use_batch_norm)

    # Encoding, block-wise
    passthrough_layers = list()
    for num_layers in block_structure:

        # Create a dense block
        encoder = network_sections.dense_2d_block(encoder, conv2d_options,
                                                  num_layers)
        passthrough_layers.append(encoder)

        # Add a transition block
        encoder = network_sections.Conv2D_Options(encoder, transition_options)

        # Pool
        encoder = MaxPooling2D(pool_size=pool_size)(encoder)

        if use_growth:
            conv2d_options["filters"] *= 2
            transition_options["filters"] *= 2

    # Encoder/Decoder Transition Block
    transition = network_sections.dense_2d_block(encoder, conv2d_options,
                                                 block_structure[-1])

    decoder = transition
    # Decoding, block-wise
    for num_layers, layer_passed_through in zip(reversed(block_structure),
                                                reversed(passthrough_layers)):

        if use_growth:
            conv2d_options["filters"] = int(conv2d_options["filters"] / 2)
            transition_options["filters"] = int(transition_options["filters"] /
                                                2)

        # Upsample
        decoder = UpSampling2D(size=pool_size,
                               interpolation="bilinear")(decoder)

        # Create dense block and concatenate
        decoder = network_sections.Conv2D_Options(decoder, conv2d_options)
        decoder = Concatenate()([layer_passed_through, decoder])

        # Add a transition block
        decoder = network_sections.dense_2d_block(decoder, transition_options,
                                                  num_layers)

    # Output convolutions
    output_layer = decoder
    output_layer = network_sections.Conv2D_Options(output_layer,
                                                   conv2d_options)
    output_layer = Conv2D(filters=n_classes,
                          kernel_size=(1, 1),
                          padding="same",
                          activation=output_activation)(output_layer)
    return keras.models.Model(inputs=[inlayer], outputs=[output_layer])
Example #29
0
n_samples = 2
dx = 2
dy = 3
dout = 7
mask_value = -1

X = np.random.randint(5, size=(n_samples, dx, dy))
X[1, 0, :] = mask_value

inp = Input(shape=(dx, dy))
x = Masking(mask_value=-1.0)(inp)
import pdb
pdb.set_trace()
lstm_fw = LSTM(dout, return_sequences=True, go_backwards=False)(x)
lstm_bw = LSTM(dout, return_sequences=True, go_backwards=True)(x)
concat = Concatenate(axis=-1)([lstm_fw, lstm_bw])
model_3 = Model(inputs=inp, outputs=concat)
model_3.summary()
model_3.set_weights(
    [np.ones(l.shape) * i for i, l in enumerate(model_3.get_weights(), 2)])
model_3.compile(optimizer="rmsprop", loss="mae")
y_true = np.ones((n_samples, dx, model_3.layers[-1].output_shape[-1]))
y_pred_3 = model_3.predict(X)
print(y_pred_3)
unmasked_loss = np.abs(1 - y_pred_3).mean()
masked_loss = np.abs(1 - y_pred_3[y_pred_3 != 0.0]).mean()
keras_loss = model_3.evaluate(X, y_true, verbose=0)
print(f"unmasked loss: {unmasked_loss}")
print(f"masked loss: {masked_loss}")
print(f"evaluate with Keras: {keras_loss}")
def neural_extractor1(data, categories, aspects, text_to_predict):

    for i in range(0, len(data)):
        flag = 0
        temp_sent = []
        text = data[i]

        words = nltk.word_tokenize(text)
        pos = []
        for word in nltk.pos_tag(words):
            parts_of_speech[word[1]] = 1
            pos.append(word[1])

        tags = ['O' for ff in range(0, len(words))]
        for aspect in aspects[i]:
            asp_words = nltk.word_tokenize(aspect.lower())

            j = 0
            k = 0
            # flag=0
            while (k < len(asp_words)):
                while (j < len(words)):
                    if (asp_words[k] == words[j] and tags[j] == 'O'):

                        if (k == 0):
                            tags[j] = 'B'
                        else:
                            tags[j] = 'I'
                        # if(flag==0):
                        # 	tags[j]='B'
                        # 	flag=1
                        # else:
                        # 	tags[j]='I'
                        k += 1
                        if (k >= len(asp_words)):
                            break
                    j += 1
                k += 1

        for ii in range(0, len(words)):
            temp_sent.append((words[ii], pos[ii], tags[ii]))
        sentences.append(temp_sent)
    print(len(sentences))

    for i in range(0, len(data)):
        tokens = nltk.word_tokenize(data[i])
        string = ' '.join(tokens)
        data[i] = string
    #data.append(' '.join(words_to_predict))
    #lll=len(data)-1
    data.append("ENDPAD")
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data)
    sequences = tokenizer.texts_to_sequences(data)
    word_index = tokenizer.word_index

    X = pad_sequences(sequences[:-1],
                      maxlen=50,
                      padding="post",
                      value=word_index["endpad"])

    validation_size = int(0.2 * X.shape[0])
    #print(X_to_predict)

    n_words = len(word_index)

    tag_list = ['B', 'I', 'O', 'P']
    n_tags = len(tag_list)

    embedding_matrix = np.zeros((n_words, 300))

    for word, i in word_index.items():
        if (i >= len(word_index)):
            continue
        if word in glove_emb:
            embedding_matrix[i] = glove_emb[word]

    max_len = 50
    tag2idx = {t: i for i, t in enumerate(tag_list)}
    idx2word = {t: i for i, t in word_index.items()}
    pos2idx = {t: i for i, t in enumerate(parts_of_speech.keys())}

    y = [[tag2idx[w[2]] for w in s] for s in sentences]
    y = pad_sequences(maxlen=max_len,
                      sequences=y,
                      padding="post",
                      value=tag2idx["P"])
    y = [to_categorical(i, num_classes=n_tags) for i in y]

    pos = [[pos2idx[w[1]] for w in s] for s in sentences]
    pos1 = pad_sequences(maxlen=max_len,
                         sequences=pos,
                         padding="post",
                         value=len(parts_of_speech.keys()) + 1)

    pos = np.asarray([np.reshape(i, (max_len, 1)) for i in pos1])

    # indices=np.arange(X.shape[0])
    # np.random.shuffle(indices)
    # X=X[indices]
    # y=y[indices]
    #validation_size=int(0.2*X.shape[0])

    X_tr = X[:-validation_size]
    tr_pos = pos[:-validation_size]
    y_tr = y[:-validation_size]
    X_te = X[-validation_size:]
    te_pos = pos[-validation_size:]
    y_te = y[-validation_size:]
    X_to_predict = X[-validation_size:]

    pos_to_predict = pos[-validation_size:]
    # X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1)

    vocab_size = len(word_index)
    e = Input(shape=(max_len, ))
    emb = Embedding(vocab_size,
                    300,
                    weights=[embedding_matrix],
                    input_length=50,
                    mask_zero=True,
                    trainable=False)(e)
    ad_pos = Input(shape=(max_len, 1))
    co_tm = Concatenate()([emb] + [ad_pos])
    bi_gru = Bidirectional(GRU(50, return_sequences=True))(emb)
    out = Dense(25, activation='relu')(bi_gru)
    # out=Dropout(0.1)(out)
    out = TimeDistributed(Dense(n_tags, activation='softmax'))(out)
    model = Model(inputs=[e, ad_pos], outputs=[out])
    model.compile(loss='categorical_crossentropy',
                  optimizer="rmsprop",
                  metrics=['accuracy'])

    model.fit([X, pos],
              np.array(y),
              batch_size=25,
              epochs=20,
              validation_data=([X_te, te_pos], np.array(y_te)),
              verbose=1)
    pos_tp = np.asarray([np.asarray([1 for i in range(0, 50)]).reshape(50, 1)])
    #model=load_model('aspect_extractor.h5')
    #with open('aspect.json') as ff:
    #	model_json=ff.read()
    #	model=keras.models.model_from_json(model_json)
    #model.compile(loss='categorical_crossentropy',optimizer="rmsprop",metrics=['accuracy'])
    #model.load_weights('aspect_weights.h5')
    #model.fit([X], np.array(y), batch_size=25, epochs=15, validation_data=([X_te],np.array(y_te)), verbose=0)
    #print(X_to_predict,X_to_predict.shape)
    p1 = model.predict([X_to_predict, pos_to_predict])
    #p1=model.predict([X_to_predict])
    #print(p1)
    pred_aspects = []
    for i in range(0, len(p1)):
        p = np.argmax(p1[i], axis=-1)
        temp1 = []
        flag = 0
        string1 = ""
        for j in range(0, len(p)):
            #print(idx2word[X_to_predict[i][j]],tag_list[p[j]])
            if (idx2word[X_to_predict[i][j]] == "endpad"):
                break
            if (tag_list[p[j]] == 'B'):
                string1 += idx2word[X_to_predict[i][j]] + " "
                if (flag == 0):
                    flag = 1
            elif (tag_list[p[j]] == 'I'):
                string1 += idx2word[X_to_predict[i][j]] + " "
            elif (tag_list[p[j]] == 'O'):
                if (string1 != ""):
                    temp1.append(string1)
                string1 = ""
                flag = 0
        pred_aspects.append(temp1)

    #print(pred_aspects)
    return pred_aspects

    # print(aspects[:-validation_size][69])

    # for i in range(0,20):
    # 	print(aspects[i],pred_aspects[i])

    # p=np.argmax(p,axis=-1)
    # true_p=np.argmax(y_tr[69],axis=-1)

    # for i in range(0,len(p)):
    # 	print(true_p[i],p[i])

    #for w, pred in zip(X_to_predict[0], p1[0]):
    #	print(idx2word[w], tag_list[pred])
    dataset_preprocessor = datasets.flickr.PreProcessing(cfg)

    MAX_LEN = 40
    EMBEDDING_DIM = 300
    IMAGE_ENC_DIM = 300
    vocab_size = get_line_count(os.path.join(cfg["workspace"]["directory"], "word_dictionary.txt"))

    img_input = Input(shape=(2048,))
    img_enc = Dense(300, activation="relu")(img_input)
    images = RepeatVector(MAX_LEN)(img_enc)

    # Text input
    text_input = Input(shape=(MAX_LEN,))
    embedding = Embedding(vocab_size, EMBEDDING_DIM, input_length=MAX_LEN)(text_input)
    x = Concatenate()([images, embedding])
    y = Bidirectional(LSTM(256, return_sequences=False))(x)
    pred = Dense(vocab_size, activation='softmax')(y)
    model = Model(inputs=[img_input, text_input], outputs=pred)
    model.compile(loss='categorical_crossentropy', optimizer="RMSProp", metrics=['accuracy'])

    model.summary()

    training_generator, validation_generator, test_generator = dataset_preprocessor.get_keras_generators("inception")

    model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=1)

    model.save_weights(os.path.join(cfg["workspace"]["directory"], cfg["model"]["arch"]+"_model.h5"))
    print("Saved model to disk")
    
    model.load_weights(os.path.join(cfg["workspace"]["directory"], cfg["model"]["arch"]+"_model.h5"))
 def __init__(self, axis, **kwargs):
     kwargs.pop(str('config'))
     Concatenate.__init__(self, axis, **kwargs)
    def train(self):
        #######################################################
        ### DESIGN MODEL FOR TRAINING
        #######################################################
        print('> Desgin Model for Training')
        ###########
        ### Encoder
        embedding_layer = Embedding(
            params['LEN_WORD2IDX_INPUTS'] + 1,
            params['EMBEDDING_DIM'],
            #weights=[embedding_matrix],
            input_length=params['MAX_LEN_INPUT'],
            # trainable=True
        )
        encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'], ))
        x = embedding_layer(encoder_inputs_placeholder)
        encoder = Bidirectional(
            LSTM(
                params['LATENT_DIM'],
                return_sequences=True,
                # dropout=0.5 # dropout not available on gpu
            ))
        encoder_outputs = encoder(x)

        ###########
        ### Decoder
        # this word embedding will not use pre-trained vectors, although you could
        decoder_embedding = Embedding(params['LEN_WORD2IDX_OUTPUTS'] + 1,
                                      params['EMBEDDING_DIM'])
        decoder_inputs_placeholder = Input(
            shape=(params['MAX_LEN_TARGET'], ))  # teacher forcing input
        decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder)

        #############
        ### Attention
        # Attention layers need to be global (전역 변수) because they will be repeated Ty times at the decoder
        attn_repeat_layer = RepeatVector(params['MAX_LEN_INPUT'])
        attn_concat_layer = Concatenate(axis=-1)
        attn_dense1 = Dense(10, activation='tanh')
        attn_dense2 = Dense(1, activation=self._softmax_over_time)
        attn_dot = Dot(
            axes=1)  # to perform the weighted sum of alpha[t] * h[t]

        def _one_step_attention(h, st_1):
            # h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2)
            # st_1 = s(t-1), shape = (LATENT_DIM_DECODER,)

            # copy s(t-1) Tx times
            # now shape = (Tx, LATENT_DIM_DECODER)
            st_1 = attn_repeat_layer(st_1)

            # Concatenate all h(t)'s with s(t-1)
            # Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2)
            x = attn_concat_layer([h, st_1])

            # Neural net first layer
            x = attn_dense1(x)

            # Neural net second layer with special softmax over time
            alphas = attn_dense2(x)

            # "Dot" the alphas and the h's
            # Remember a.dot(b) = sum over a[t] * b[t]
            context = attn_dot([alphas, h])

            return context

        # define the rest of the decoder (after attention)
        decoder_lstm = LSTM(params['LATENT_DIM_DECODER'], return_state=True)
        decoder_dense = Dense(params['LEN_WORD2IDX_OUTPUTS'] + 1,
                              activation='softmax')

        initial_s = Input(shape=(params['LATENT_DIM_DECODER'], ), name='s0')
        initial_c = Input(shape=(params['LATENT_DIM_DECODER'], ), name='c0')
        context_last_word_concat_layer = Concatenate(
            axis=2)  # for teacher forcing

        # Unlike previous seq2seq, we cannot get the output all in one step
        # Instead we need to do Ty steps And in each of those steps, we need to consider all Tx h's

        # s, c will be re-assigned in each iteration of the loop
        s = initial_s
        c = initial_c

        # collect outputs in a list at first
        outputs = []
        # 원래 LSTM은 내부적으로 아래와 같은 for문을 진행하지만, 여기서 우리는 Context를 계산하기 위해서 manual하게 for문을 구성함.
        for t in range(params['MAX_LEN_TARGET']):  # Ty times

            ######################################################
            ## `one_step_attention` function !
            # get the context using attention
            context = _one_step_attention(encoder_outputs, s)

            # we need a different layer for each time step
            selector = Lambda(
                lambda x: x[:, t:t + 1]
            )  # 해당 time 벡터만 추출. 우리는 layer-wise로 코딩해야 되기 때문에 lambda를 사용.
            xt = selector(decoder_inputs_x)

            # combine
            decoder_lstm_input = context_last_word_concat_layer([context, xt])

            # pass the combined [context, last word] into the LSTM
            # along with [s, c]
            # get the new [s, c] and output
            o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[s, c])

            # final dense layer to get next word prediction
            decoder_outputs = decoder_dense(o)
            outputs.append(decoder_outputs)

        # make it a layer
        stacker = Lambda(self._stack_and_transpose)
        outputs = stacker(outputs)

        #########
        ### Model
        self.model = Model(inputs=[
            encoder_inputs_placeholder,
            decoder_inputs_placeholder,
            initial_s,
            initial_c,
        ],
                           outputs=outputs)

        # compile the model
        self.model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

        #######################################################
        ### TRAINING
        #######################################################
        print('> Train Model Start...')

        # create targets, since we cannot use sparse
        # categorical cross entropy when we have sequences
        decoder_targets_one_hot = np.zeros(
            (params['LEN_INPUT_TEXTS'], params['MAX_LEN_TARGET'],
             params['LEN_WORD2IDX_OUTPUTS'] + 1),
            dtype='float32')

        # assign the values
        for i, d in enumerate(self.decoder_targets):
            for t, word in enumerate(d):
                decoder_targets_one_hot[i, t, word] = 1

        # train the model
        z = np.zeros((params['LEN_INPUT_TEXTS'],
                      params['LATENT_DIM_DECODER']))  # initial [s, c]
        r = self.model.fit(
            [self.encoder_inputs, self.decoder_inputs, z, z],
            decoder_targets_one_hot,
            batch_size=params['BATCH_SIZE'],
            epochs=params['EPOCHS'],
            validation_split=0.15,
            callbacks=[EarlyStopping(monitor='val_loss',
                                     patience=10)]  # early stopping
        )

        # save trained model's weights
        #model_json = model.to_json()
        #with open(cur_path + "/test_model.json", "w") as json_f:
        #    json_f.write(model_json)
        self.model.save_weights(CUR_PATH + '/resources/' + YEARMONTHDAY +
                                "_model.h5")
        log(">> Saved model's weight")
        # save plot-data
        # plot some data
        #plt.figure()
        #plt.plot(r.history['loss'], label='loss')
        #plt.plot(r.history['val_loss'], label='val_loss')
        #plt.legend()
        #plt.show()
        plt.savefig(CUR_PATH + '/resources/' + 'loss_plot.png')
        # accuracies
        #plt.figure()
        #plt.plot(r.history['acc'], label='acc')
        #plt.plot(r.history['val_acc'], label='val_acc')
        #plt.legend()
        #plt.show()
        plt.savefig(CUR_PATH + '/resources/' + 'acc_plot.png')

        #######################################################
        ### DESIGN MODEL FOR PREDICTION
        #######################################################
        log('> Desgin Model for Prediction')

        ##### Make predictions #####
        # As with the poetry example, we need to create another model
        # that can take in the RNN state and previous word as input
        # and accept a T=1 sequence.
        #encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'],))

        # The encoder will be stand-alone
        # From this we will get our initial decoder hidden state
        # i.e. h(1), ..., h(Tx)
        self.encoder_model = Model(encoder_inputs_placeholder, encoder_outputs)

        # next we define a T=1 decoder model
        encoder_outputs_as_input = Input(shape=(
            params['MAX_LEN_INPUT'],
            params['LATENT_DIM'] * 2,
        ))
        decoder_inputs_single = Input(shape=(1, ))
        decoder_inputs_single_x = decoder_embedding(decoder_inputs_single)

        # no need to loop over attention steps this time because there is only one step
        context = _one_step_attention(encoder_outputs_as_input, initial_s)

        # combine context with last word
        decoder_lstm_input = context_last_word_concat_layer(
            [context, decoder_inputs_single_x])

        # lstm and final dense
        o, s, c = decoder_lstm(decoder_lstm_input,
                               initial_state=[initial_s, initial_c])
        decoder_outputs = decoder_dense(o)

        # note: we don't really need the final stack and tranpose
        # because there's only 1 output
        # it is already of size N x D
        # no need to make it 1 x N x D --> N x 1 x D
        # time dimension이 1이기 때문에 자동으로 없어짐: 따라서, stack_and_transpose함수가 필요없음.

        # create the model object
        self.decoder_model = Model(inputs=[
            decoder_inputs_single, encoder_outputs_as_input, initial_s,
            initial_c
        ],
                                   outputs=[decoder_outputs, s, c])
Example #34
0
from keras.models import Sequential
from keras.layers import Dense, Embedding, Reshape, Concatenate
from sys import argv

num_users = 1000
num_rooms = 2000
user_attr = 5
room_attr = 3

input1 = Sequential()
input1.add(Embedding(num_users + 1, user_attr, input_length=1))
input1.add(Reshape((user_attr, )))
input2 = Sequential()
input2.add(Embedding(num_rooms, room_attr, input_length=1))
input2.add(Reshape((room_attr, )))

model = Sequential()
model.add(Concatenate([input1, input2]))
model.add(Dense(user_attr + room_attr, activation='relu'))
model.add(Dense(user_attr + room_attr, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.load_weights('user' + str(argv[1]) + '_weights.h5')

res = model.predict([np.array(argv[1]), np.array(argv[2])])
def ssd_300(image_size,
            n_classes,
            mode='training',
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=None,
            limit_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            coords='centroids',
            normalize_coords=False,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=True,
            confidence_thresh=0.01,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400,
            return_predictor_sizes=False):
    '''
    Build a Keras model with SSD300 architecture, see references.

    The base network is a reduced atrous VGG-16, extended by the SSD architecture,
    as described in the paper.

    Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network, the parameters passed here must
    be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading
    trained weights, the parameters passed here must be the same as the ones used
    to produce the trained weights.

    Some of these arguments are explained in more detail in the documentation of the
    `SSDBoxEncoder` class.

    Note: Requires Keras v2.0 or later. Currently works only with the
    TensorFlow backend (v1.0 or later).

    Arguments:
        image_size (tuple): The input image size in the format `(height, width, channels)`.
        n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
        mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode,
            the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes,
            the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding,
            non-maximum suppression, and top-k filtering. The difference between latter two modes is that
            'inference' follows the exact procedure of the original Caffe implementation, while
            'inference_fast' uses a faster prediction decoding procedure.
        l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers.
            Set to zero to deactivate L2-regularization.
        min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images.
        max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. All scaling factors between the smallest and the
            largest will be linearly interpolated. Note that the second to last of the linearly interpolated
            scaling factors will actually be the scaling factor for the last predictor layer, while the last
            scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
            if `two_boxes_for_ar1` is `True`.
        scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer.
            This list must be one element longer than the number of predictor layers. The first `k` elements are the
            scaling factors for the `k` predictor layers, while the last element is used for the second box
            for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
            last scaling factor must be passed either way, even if it is not being used.
            Defaults to `None`. If a list is passed, this argument overrides `min_scale` and
            `max_scale`. All scaling factors must be greater than zero.
        aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
            generated. This list is valid for all prediction layers. Defaults to None.
        aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer.
            This allows you to set the aspect ratios for each predictor layer individually, which is the case for the
            original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`.
            Defaults to the aspect ratios used in the original SSD300 architecture, i.e.:
                [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]]
        two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
            If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
            using the scaling factor for the respective layer, the second one will be generated using
            geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`, following the original
            implementation.
        steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
            pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
            the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
            If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
            If no steps are provided, then they will be computed such that the anchor box center points will form an
            equidistant grid within the image dimensions.
        offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either floats or tuples of two floats. These numbers represent for each predictor layer how many
            pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
            as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
            of the step size specified in the `steps` argument. If the list contains floats, then that value will
            be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
            `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size.
        limit_boxes (bool, optional): If `True`, limits box coordinates to stay within image boundaries.
            This would normally be set to `True`, but here it defaults to `False`, following the original
            implementation.
        variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors
            to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply
            no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater
            than 1.0 downscale the encoded predictions. Defaults to `[0.1, 0.1, 0.2, 0.2]`, following the original implementation.
            The coordinate format must be 'centroids'.
        coords (str, optional): The box coordinate format to be used. Can be either 'centroids' for the format
            `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`. Defaults to 'centroids', following the original implementation.
        normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates,
            i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. Defaults to `False`.
        subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values
            of any shape that is broadcast-compatible with the image shape. The elements of this array will be
            subtracted from the image pixel intensity values. For example, pass a list of three integers
            to perform per-channel mean normalization for color images.
        divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or
            floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
            intensity values will be divided by the elements of this array. For example, pass a list
            of three integers to perform per-channel standard deviation normalization for color images.
        swap_channels (bool, optional): If `True`, the color channel order of the input images will be reversed,
            i.e. if the input color channel order is RGB, the color channels will be swapped to BGR.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box's confidence score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage.
        nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage.
        return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also
            a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since
            you can always get their sizes easily via the Keras API, but it's convenient and less error-prone
            to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the
            spatial dimensions of the predictor layers), for inference you don't need them.

    Returns:
        model: The Keras SSD300 model.
        predictor_sizes (optional): A Numpy array containing the `(height, width)` portion
            of the output tensor shape for each convolutional predictor layer. During
            training, the generator function needs this in order to transform
            the ground truth labels into tensors of identical structure as the
            output tensors of the model, which is in turn needed for the cost
            function.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 6  # The number of predictor conv layers in the network is 6 for the original SSD300.
    n_classes += 1  # Account for the background class.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(lambda z: z,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(lambda z: z - np.array(subtract_mean),
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(lambda z: z / np.array(divide_by_stddev),
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels and (img_channels == 3):
        x1 = Lambda(lambda z: z[..., ::-1],
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc6')(pool5)

    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc7')(fc6)

    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_2')(conv6_1)

    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_2')(conv7_1)

    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_2')(conv8_1)

    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_1')(conv8_2)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_2')(conv9_1)

    # Feed conv4_3 into the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)

    ### Build the convolutional predictor layers on top of the base network

    # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes`
    # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)`
    conv4_3_norm_mbox_conf = Conv2D(
        n_boxes[0] * n_classes, (3, 3),
        padding='same',
        kernel_initializer='he_normal',
        kernel_regularizer=l2(l2_reg),
        name='conv4_3_norm_mbox_conf')(conv4_3_norm)
    fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3),
                           padding='same',
                           kernel_initializer='he_normal',
                           kernel_regularizer=l2(l2_reg),
                           name='fc7_mbox_conf')(fc7)
    conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv6_2_mbox_conf')(conv6_2)
    conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv7_2_mbox_conf')(conv7_2)
    conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv8_2_mbox_conf')(conv8_2)
    conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv9_2_mbox_conf')(conv9_2)
    # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4`
    # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)`
    conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3),
                                   padding='same',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(l2_reg),
                                   name='conv4_3_norm_mbox_loc')(conv4_3_norm)
    fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3),
                          padding='same',
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(l2_reg),
                          name='fc7_mbox_loc')(fc7)
    conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv6_2_mbox_loc')(conv6_2)
    conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv7_2_mbox_loc')(conv7_2)
    conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv8_2_mbox_loc')(conv8_2)
    conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv9_2_mbox_loc')(conv9_2)

    ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names)

    # Output shape of anchors: `(batch, height, width, n_boxes, 8)`
    conv4_3_norm_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[0],
        next_scale=scales[1],
        aspect_ratios=aspect_ratios[0],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[0],
        this_offsets=offsets[0],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc)
    fc7_mbox_priorbox = AnchorBoxes(img_height,
                                    img_width,
                                    this_scale=scales[1],
                                    next_scale=scales[2],
                                    aspect_ratios=aspect_ratios[1],
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    this_steps=steps[1],
                                    this_offsets=offsets[1],
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    coords=coords,
                                    normalize_coords=normalize_coords,
                                    name='fc7_mbox_priorbox')(fc7_mbox_loc)
    conv6_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[2],
        next_scale=scales[3],
        aspect_ratios=aspect_ratios[2],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[2],
        this_offsets=offsets[2],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc)
    conv7_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[3],
        next_scale=scales[4],
        aspect_ratios=aspect_ratios[3],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[3],
        this_offsets=offsets[3],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc)
    conv8_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[4],
        next_scale=scales[5],
        aspect_ratios=aspect_ratios[4],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[4],
        this_offsets=offsets[4],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc)
    conv9_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[5],
        next_scale=scales[6],
        aspect_ratios=aspect_ratios[5],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[5],
        this_offsets=offsets[5],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc)

    ### Reshape

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    conv4_3_norm_mbox_conf_reshape = Reshape(
        (-1, n_classes),
        name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf)
    fc7_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf)
    conv6_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf)
    conv7_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf)
    conv8_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf)
    conv9_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf)
    # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    conv4_3_norm_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc)
    fc7_mbox_loc_reshape = Reshape((-1, 4),
                                   name='fc7_mbox_loc_reshape')(fc7_mbox_loc)
    conv6_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc)
    conv7_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc)
    conv8_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc)
    conv9_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape(
        (-1, 8),
        name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)

    ### Concatenate the predictions from the different layers

    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1, the number of boxes per layer
    # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes)
    mbox_conf = Concatenate(axis=1, name='mbox_conf')([
        conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape,
        conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape,
        conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape
    ])

    # Output shape of `mbox_loc`: (batch, n_boxes_total, 4)
    mbox_loc = Concatenate(axis=1, name='mbox_loc')([
        conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape,
        conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape,
        conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape
    ])

    # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8)
    mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([
        conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape,
        conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape,
        conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape
    ])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)

    # Concatenate the class and box predictions and the anchors to one large predictions vector
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_priorbox])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetections2(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        # Get the spatial dimensions (height, width) of the predictor conv layers, we need them to
        # be able to generate the default boxes for the matching process outside of the model during training.
        # Note that the original implementation performs anchor box matching inside the loss function. We don't do that.
        # Instead, we'll do it in the batch generator function.
        # The spatial dimensions are the same for the confidence and localization predictors, so we just take those of the conf layers.
        predictor_sizes = np.array([
            conv4_3_norm_mbox_conf._keras_shape[1:3],
            fc7_mbox_conf._keras_shape[1:3],
            conv6_2_mbox_conf._keras_shape[1:3],
            conv7_2_mbox_conf._keras_shape[1:3],
            conv8_2_mbox_conf._keras_shape[1:3],
            conv9_2_mbox_conf._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model