def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                **kwargs):
    """Instantiates the MobileNetV3 architecture.
    # Arguments
        stack_fn: a function that returns output tensor for the
            stacked residual blocks.
        last_point_ch: number channels at the last layer (before top)
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
            depth multiplier in the MobileNetV3 paper, but the name is kept for
            consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                are used at each layer.
        model_type: MobileNetV3 is defined as two models: large and small. These
        models are targeted at high and low resource use cases respectively.
        minimalistic: In addition to large and small models this module also contains
            so-called minimalistic models, these models have the same per-layer
            dimensions characteristic as MobilenetV3 however, they don't utilize any
            of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5
            convolutions). While these models are less efficient on CPU, they are
            much more performant on GPU/DSP.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        dropout_rate: fraction of the input units to drop on the last layer
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid model type, argument for `weights`,
            or invalid input shape when weights='imagenet'
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    # If input_shape is None and input_tensor is None using standart shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if K.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            'Input size must be at least 32x32; got `input_shape=' +
            str(input_shape) + '`')
    if weights == 'imagenet':
        if minimalistic is False and alpha not in [0.75, 1.0] \
                or minimalistic is True and alpha != 1.0:
            raise ValueError(
                'If imagenet weights are being loaded, '
                'alpha can be one of `0.75`, `1.0` for non minimalistic'
                ' or `1.0` for minimalistic only.')

        if rows != cols or rows != 224:
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not 224.'
                          ' Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                      name='Conv_pad')(img_input)
    x = Conv2D(16,
               kernel_size=3,
               strides=(2, 2),
               padding='valid',
               use_bias=False,
               name='Conv')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv/BatchNorm')(x)
    x = Activation(activation)(x)

    x, skip_feature = stack_fn(x, kernel, activation, se_ratio)
    # keep end of the feature extrator as final feature map
    final_feature = x

    last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6)

    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)

    x = Conv2D(last_conv_ch,
               kernel_size=1,
               padding='same',
               use_bias=False,
               name='Conv_1')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv_1/BatchNorm')(x)
    x = Activation(activation)(x)

    if include_top:
        x = GlobalAveragePooling2D()(x)
        if channel_axis == 1:
            x = Reshape((last_conv_ch, 1, 1))(x)
        else:
            x = Reshape((1, 1, last_conv_ch))(x)
        x = Conv2D(last_point_ch, kernel_size=1, padding='same',
                   name='Conv_2')(x)
        x = Activation(activation)(x)
        if dropout_rate > 0:
            x = Dropout(dropout_rate)(x)
        x = Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x)
        x = Flatten()(x)
        x = Softmax(name='Predictions/Softmax')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='max_pool')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='MobilenetV3' + model_type)

    # Load weights.
    if weights == 'imagenet':
        model_name = "{}{}_224_{}_float".format(
            model_type, '_minimalistic' if minimalistic else '', str(alpha))
        if include_top:
            file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = get_file(file_name,
                                BASE_WEIGHT_PATH + file_name,
                                cache_subdir='models',
                                file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    #return model
    return final_feature, skip_feature, len(model.layers) - 3
def line_lstm_ctc(input_shape,
                  output_shape,
                  window_width=28,
                  window_stride=14):

    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    num_windows = int((image_width - window_width) / window_stride) + 1

    if num_windows < output_length:
        raise ValueError(
            f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})'
        )

    image_input = Input(shape=input_shape, name='image')
    y_true = Input(shape=(output_length, ), name='y_true')
    input_length = Input(shape=(1, ), name='input_length')
    label_length = Input(shape=(1, ), name='label_length')

    gpu_present = len(device_lib.list_local_devices()) > 1
    lstm_fn = CuDNNLSTM if gpu_present else LSTM

    # Your code should use slide_window and extract image patches from image_input.
    # Pass a convolutional model over each image patch to generate a feature vector per window.
    # Pass these features through one or more LSTM layers.
    # Convert the lstm outputs to softmax outputs.
    # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).

    ##### Your code below (Lab 3)
    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    image_patches = Lambda(slide_window,
                           arguments={
                               'window_width': window_width,
                               'window_stride': window_stride
                           })(image_reshaped)

    convnet = lenet((image_height, window_width, 1), (num_classes, ))
    convnet = KerasModel(inputs=convnet.inputs,
                         outputs=convnet.layers[-2].output)

    convnet_outputs = TimeDistributed(convnet)(image_patches)
    # (num_windows, 200)
    lstm_output = lstm_fn(200, return_sequences=True)(convnet_outputs)

    softmax_output = Dense(num_classes,
                           activation='softmax',
                           name='softmax_output')(lstm_output)

    ##### Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows,
        arguments={'num_windows': num_windows})(input_length)

    ctc_loss_output = Lambda(
        lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')(
            [y_true, softmax_output, input_length_processed, label_length])

    ctc_decoded_output = Lambda(
        lambda x: ctc_decode(x[0], x[1], output_length),
        name='ctc_decoded')([softmax_output, input_length_processed])

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length],
        outputs=[ctc_loss_output, ctc_decoded_output])
    return model
Ejemplo n.º 3
0
    def init(self, printSummary=True): # keep_negitive = 0 on inputs, otherwise for weights keep default (=1)
        encoded_dim = self.pams['encoded_dim']

        CNN_layer_nodes = self.pams['CNN_layer_nodes']
        CNN_kernel_size = self.pams['CNN_kernel_size']
        CNN_pool = self.pams['CNN_pool']
        Dense_layer_nodes = self.pams['Dense_layer_nodes']  # does not include encoded layer
        channels_first = self.pams['channels_first']

        inputs = Input(shape=self.pams['shape'])  # adapt this if using `channels_first` image data format

        # load bits to quantize
        nBits_input  = self.pams['nBits_input']
        nBits_accum  = self.pams['nBits_accum']
        nBits_weight = self.pams['nBits_weight']
        nBits_encod  = self.pams['nBits_encod']
        nBits_dense  = self.pams['nBits_dense'] if 'nBits_dense' in self.pams else nBits_weight
        nBits_conv   = self.pams['nBits_conv' ] if 'nBits_conv'  in self.pams else nBits_weight

        input_Qbits  = self.GetQbits(nBits_input, nBits_input['keep_negative']) 
        accum_Qbits  = self.GetQbits(nBits_accum, nBits_accum['keep_negative'])
        dense_Qbits  = self.GetQbits(nBits_dense, nBits_dense['keep_negative'])
        conv_Qbits   = self.GetQbits(nBits_conv , nBits_conv ['keep_negative'])
        encod_Qbits  = self.GetQbits(nBits_encod, nBits_encod['keep_negative'])
        # keeping weights and bias same precision for now

        # define model
        x = inputs
        x = QActivation(input_Qbits, name='input_qa')(x)
        for i, n_nodes in enumerate(CNN_layer_nodes):
            if channels_first:
                x = QConv2D(n_nodes, CNN_kernel_size[i], activation='relu', padding='same',
                            data_format='channels_first', name="conv2d_"+str(i)+"_m",
                            kernel_quantizer=conv_Qbits, bias_quantizer=conv_Qbits)(x)
            else:
                x = QConv2D(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', name="conv2d_"+str(i)+"_m",
                            kernel_quantizer=conv_Qbits, bias_quantizer=conv_Qbits)(x)
            if CNN_pool[i]:
                if channels_first:
                    x = MaxPooling2D((2, 2), padding='same', data_format='channels_first', name="mp_"+str(i))(x)
                else:
                    x = MaxPooling2D((2, 2), padding='same', name="mp_"+str(i))(x)

        shape = K.int_shape(x)
        x = QActivation(accum_Qbits, name='accum1_qa')(x)
        x = Flatten(name="flatten")(x)
        
        # extended inputs fed forward to the dense layer
        # if self.extend:
        #     inputs2 = Input(shape=(2,))  # maxQ, occupancy
            # input2_Qbits  = self.GetQbits(nBits_input, keep_negative=1) #oddly fails if keep_neg=0
            # input2_Qbits
            # x = inputs
            # x = QActivation(input_Qbits, name='input_qa')(x)
            

        # encoder dense nodes
        for i, n_nodes in enumerate(Dense_layer_nodes):
            x = QDense(n_nodes, activation='relu', name="en_dense_"+str(i),
                           kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x)


        #x = QDense(encoded_dim, activation='relu', name='encoded_vector',
        #                      kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x)
        x = QDense(encoded_dim, activation=self.pams['activation'], name='encoded_vector',
                              kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x)
        encodedLayer = QActivation(encod_Qbits, name='encod_qa')(x)

        # Instantiate Encoder Model
        self.encoder = Model(inputs, encodedLayer, name='encoder')
        if printSummary:
            self.encoder.summary()

        encoded_inputs = Input(shape=(encoded_dim,), name='decoder_input')
        x = encoded_inputs

        # decoder dense nodes
        for i, n_nodes in enumerate(Dense_layer_nodes):
            x = Dense(n_nodes, activation='relu', name="de_dense_"+str(i))(x)

        x = Dense(shape[1] * shape[2] * shape[3], activation='relu', name='de_dense_final')(x)
        x = Reshape((shape[1], shape[2], shape[3]),name="de_reshape")(x)

        for i, n_nodes in enumerate(CNN_layer_nodes):

            if CNN_pool[i]:
                if channels_first:
                    x = UpSampling2D((2, 2), data_format='channels_first', name="up_"+str(i))(x)
                else:
                    x = UpSampling2D((2, 2), name="up_"+str(i))(x)

            if channels_first:
                x = Conv2DTranspose(n_nodes, CNN_kernel_size[i], activation='relu', padding='same',
                                    data_format='channels_first', name="conv2D_t_"+str(i))(x)
            else:
                x = Conv2DTranspose(n_nodes, CNN_kernel_size[i], activation='relu', padding='same',
                                    name="conv2D_t_"+str(i))(x)

        if channels_first:
            # shape[0] will be # of channel
            x = Conv2DTranspose(filters=self.pams['shape'][0], kernel_size=CNN_kernel_size[0], padding='same',
                                data_format='channels_first', name="conv2d_t_final")(x)

        else:
            x = Conv2DTranspose(filters=self.pams['shape'][2], kernel_size=CNN_kernel_size[0], padding='same',
                                name="conv2d_t_final")(x)
        x = QActivation(input_Qbits, name='q_decoder_output')(x) #Verify this step needed?
        outputs = Activation('sigmoid', name='decoder_output')(x)

        self.decoder = Model(encoded_inputs, outputs, name='decoder')
        if printSummary:
            self.decoder.summary()

        self.autoencoder = Model(inputs, self.decoder(self.encoder(inputs)), name='autoencoder')
        if printSummary:
            self.autoencoder.summary()

        self.compileModels()

        CNN_layers = ''
        if len(CNN_layer_nodes) > 0:
            CNN_layers += '_Conv'
            for i, n in enumerate(CNN_layer_nodes):
                CNN_layers += f'_{n}x{CNN_kernel_size[i]}'
                if CNN_pool[i]:
                    CNN_layers += 'pooled'
        Dense_layers = ''
        if len(Dense_layer_nodes) > 0:
            Dense_layers += '_Dense'
            for n in Dense_layer_nodes:
                Dense_layers += f'_{n}'

        self.name = f'Autoencoded{CNN_layers}{Dense_layers}_Encoded_{encoded_dim}'

        if not self.weights_f == '':
            self.autoencoder.load_weights(self.weights_f)
    def build_discriminator(self):

        img = Input(shape=self.img_shape)
        '''    
        l1 = Input(shape=(64,))
        
        #label1 = Embedding(10, 10 )(l1)
        #label2 = Embedding(10, 10  )(l2)
        n_nodes = 128* 128
        label1 = Dense(n_nodes)(l1)
        label1 = Reshape((128, 128, 1))(label1)
        
        l2 = Input(shape=(64,))
        label2 = Dense(n_nodes)(l2)
        label2 = Reshape((128, 128, 1))(label2)

        l3 = Input(shape=(64,))
        #label3 = Embedding(10, 10 )(l3)
        label3 = Dense(n_nodes)(l3)
        label3 = Reshape((128, 128, 1))(label3)
        
        merge = Concatenate()([img, label1,label2,label3])
        '''
        l1 = Input(shape=(64,))
        l2 = Input(shape=(64,))
        l3 = Input(shape=(64,))
        label =Concatenate()([ l1,l2,l3])
        n_nodes = 128* 128
        label = Dense(n_nodes)(label)
        label = Reshape((128, 128, 1))(label)
        merge = Concatenate()([img, label])
        
        dis = Conv2D(16, kernel_size=3, strides=2, padding="same")(merge)
        dis = LeakyReLU(alpha=0.2)(dis)
        #dis = Dropout(0.25)(dis)

        dis = Conv2D(32, kernel_size=3, strides=2, padding="same")(dis)
        dis = LeakyReLU(alpha=0.2)(dis)
        #dis = Dropout(0.25)(dis)
        #dis = BatchNormalization(momentum=0.8)(dis)

        dis = Conv2D(64, kernel_size=3, strides=2, padding="same")(dis)
        dis = LeakyReLU(alpha=0.2)(dis)
        #dis = Dropout(0.25)(dis)
        #dis = BatchNormalization(momentum=0.8)(dis)

        dis = Conv2D(128, kernel_size=3, strides=2, padding="same")(dis)
        dis = LeakyReLU(alpha=0.2)(dis)
        #dis = Dropout(0.25)(dis)

        dis = Flatten()(dis)

        # Extract feature representation
        features = dis

        # Determine validity and label of the image
        validity = Dense(1, activation="sigmoid")(features)
        model = Model([img, l1,l2,l3], [validity])

        model.compile(loss=self.loss,
            optimizer=self.optimizer,
            metrics=['accuracy'])

        return model
Ejemplo n.º 5
0
# (60000, 28, 28, 1)
# (10000, 28, 28, 1)

#2. 모델링
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.layers import Reshape
#4차원 shape도  Dense형태로 가능하나, 출력할 때 Flatten을 해야한다.
model = Sequential()
model.add(Dense(64, input_shape=(28, 28, 1)))
model.add(Flatten())
model.add(Dense(16))
model.add(Dense(16))
model.add(Dense(784, activation='relu')
          )  # 아래 reshape해주는 레이어와 [0], [1], [2] 이 곱했을 때 맞아야함 노드의 갯수를 맞춰야함.
model.add(Reshape((28, 28, 1)))  #reshape시켜줌 : 연산레이어가 아닌 위에서 받은 레이어를 잘라주는 레이어
#디폴트 값이 있기 때문에 (())<-를 두개 사용 이 후 배울 예정
model.add(Dense(1))  #1로 출력
model.summary()

#3. 컴파일, 훈련
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

early_stopping = EarlyStopping(monitor='loss', patience=6, mode='auto')
modelpath = '../data/modelCheckpoint/k45_mnist_{epoch:02d}-{val_loss:.4f}.hdf5'
# k45_mnist_37_0100(0.0100).hdf5
cp = ModelCheckpoint(filepath=modelpath,
                     monitor='val_loss',
                     save_best_only=True,
                     mode='auto')
#filepath='(경로)' : 가중치를 세이브 해주는 루트
Ejemplo n.º 6
0
def color_delta_unet_model(img_shape,
                           n_output_chans,
                           model_name='color_delta_unet',
                           enc_params=None,
                           include_aux_input=False,
                           aux_input_shape=None,
                           do_warp_to_target_space=False):
    x_src = Input(img_shape, name='input_src')
    x_tgt = Input(img_shape, name='input_tgt')
    inputs = [x_src, x_tgt]

    if aux_input_shape is None:
        aux_input_shape = img_shape

    x_seg = Input(aux_input_shape, name='input_src_aux')
    inputs += [x_seg]

    if do_warp_to_target_space:  # warp transformed vol to target space in the end
        n_dims = len(img_shape) - 1
        flow_srctotgt = Input(img_shape[:-1] + (n_dims, ), name='input_flow')
        inputs += [flow_srctotgt]

    if include_aux_input:
        unet_inputs = [x_src, x_tgt, x_seg]
        unet_input_shape = img_shape[:-1] + (img_shape[-1] * 2 +
                                             aux_input_shape[-1], )
    else:
        unet_inputs = [x_src, x_tgt]
        unet_input_shape = img_shape[:-1] + (img_shape[-1] * 2, )
    x_stacked = Concatenate(axis=-1)(unet_inputs)

    n_dims = len(img_shape) - 1

    if n_dims == 2:
        color_delta = unet2D(
            x_stacked,
            unet_input_shape,
            n_output_chans,
            nf_enc=enc_params['nf_enc'],
            nf_dec=enc_params['nf_dec'],
            n_convs_per_stage=enc_params['n_convs_per_stage'],
        )
        conv_fn = Conv2D
    else:
        color_delta = unet3D(
            x_stacked,
            unet_input_shape,
            n_output_chans,
            nf_enc=enc_params['nf_enc'],
            nf_dec=enc_params['nf_dec'],
            n_convs_per_stage=enc_params['n_convs_per_stage'],
        )
        conv_fn = Conv3D

    # last conv to get the output shape that we want
    color_delta = conv_fn(n_output_chans,
                          kernel_size=3,
                          padding='same',
                          name='color_delta')(color_delta)

    transformed_out = Add(name='add_color_delta')([x_src, color_delta])
    if do_warp_to_target_space:
        transformed_out = SpatialTransformer(indexing='xy')(
            [transformed_out, flow_srctotgt])

    # hacky, but do a reshape so keras doesnt complain about returning an input
    x_seg = Reshape(aux_input_shape, name='aux')(x_seg)

    return Model(inputs=inputs,
                 outputs=[transformed_out, color_delta, x_seg],
                 name=model_name)
def get_test_model_exhaustive():
    """Returns a exhaustive test model."""
    input_shapes = [
        (2, 3, 4, 5, 6),
        (2, 3, 4, 5, 6),
        (7, 8, 9, 10),
        (7, 8, 9, 10),
        (11, 12, 13),
        (11, 12, 13),
        (14, 15),
        (14, 15),
        (16,),
        (16,),
        (2,),
        (1,),
        (2,),
        (1,),
        (1, 3),
        (1, 4),
        (1, 1, 3),
        (1, 1, 4),
        (1, 1, 1, 3),
        (1, 1, 1, 4),
        (1, 1, 1, 1, 3),
        (1, 1, 1, 1, 4),
        (26, 28, 3),
        (4, 4, 3),
        (4, 4, 3),
        (4,),
        (2, 3),
        (1,),
        (1,),
        (1,),
        (2, 3),
        (9, 16, 1),
        (1, 9, 16)
    ]

    inputs = [Input(shape=s) for s in input_shapes]

    outputs = []

    outputs.append(Conv1D(1, 3, padding='valid')(inputs[6]))
    outputs.append(Conv1D(2, 1, padding='same')(inputs[6]))
    outputs.append(Conv1D(3, 4, padding='causal', dilation_rate=2)(inputs[6]))
    outputs.append(ZeroPadding1D(2)(inputs[6]))
    outputs.append(Cropping1D((2, 3))(inputs[6]))
    outputs.append(MaxPooling1D(2)(inputs[6]))
    outputs.append(MaxPooling1D(2, strides=2, padding='same')(inputs[6]))
    outputs.append(MaxPooling1D(2, data_format="channels_first")(inputs[6]))
    outputs.append(AveragePooling1D(2)(inputs[6]))
    outputs.append(AveragePooling1D(2, strides=2, padding='same')(inputs[6]))
    outputs.append(AveragePooling1D(2, data_format="channels_first")(inputs[6]))
    outputs.append(GlobalMaxPooling1D()(inputs[6]))
    outputs.append(GlobalMaxPooling1D(data_format="channels_first")(inputs[6]))
    outputs.append(GlobalAveragePooling1D()(inputs[6]))
    outputs.append(GlobalAveragePooling1D(data_format="channels_first")(inputs[6]))

    outputs.append(Conv2D(4, (3, 3))(inputs[4]))
    outputs.append(Conv2D(4, (3, 3), use_bias=False)(inputs[4]))
    outputs.append(Conv2D(4, (2, 4), strides=(2, 3), padding='same')(inputs[4]))
    outputs.append(Conv2D(4, (2, 4), padding='same', dilation_rate=(2, 3))(inputs[4]))

    outputs.append(SeparableConv2D(3, (3, 3))(inputs[4]))
    outputs.append(DepthwiseConv2D((3, 3))(inputs[4]))
    outputs.append(DepthwiseConv2D((1, 2))(inputs[4]))

    outputs.append(MaxPooling2D((2, 2))(inputs[4]))
    # todo: check if TensorFlow >= 2.1 supports this
    #outputs.append(MaxPooling2D((2, 2), data_format="channels_first")(inputs[4])) # Default MaxPoolingOp only supports NHWC on device type CPU
    outputs.append(MaxPooling2D((1, 3), strides=(2, 3), padding='same')(inputs[4]))
    outputs.append(AveragePooling2D((2, 2))(inputs[4]))
    # todo: check if TensorFlow >= 2.1 supports this
    #outputs.append(AveragePooling2D((2, 2), data_format="channels_first")(inputs[4])) # Default AvgPoolingOp only supports NHWC on device type CPU
    outputs.append(AveragePooling2D((1, 3), strides=(2, 3), padding='same')(inputs[4]))

    outputs.append(GlobalAveragePooling2D()(inputs[4]))
    outputs.append(GlobalAveragePooling2D(data_format="channels_first")(inputs[4]))
    outputs.append(GlobalMaxPooling2D()(inputs[4]))
    outputs.append(GlobalMaxPooling2D(data_format="channels_first")(inputs[4]))

    outputs.append(Permute((3, 4, 1, 5, 2))(inputs[0]))
    outputs.append(Permute((1, 5, 3, 2, 4))(inputs[0]))
    outputs.append(Permute((3, 4, 1, 2))(inputs[2]))
    outputs.append(Permute((2, 1, 3))(inputs[4]))
    outputs.append(Permute((2, 1))(inputs[6]))
    outputs.append(Permute((1,))(inputs[8]))

    outputs.append(Permute((3, 1, 2))(inputs[31]))
    outputs.append(Permute((3, 1, 2))(inputs[32]))
    outputs.append(BatchNormalization()(Permute((3, 1, 2))(inputs[31])))
    outputs.append(BatchNormalization()(Permute((3, 1, 2))(inputs[32])))

    outputs.append(BatchNormalization()(inputs[0]))
    outputs.append(BatchNormalization(axis=1)(inputs[0]))
    outputs.append(BatchNormalization(axis=2)(inputs[0]))
    outputs.append(BatchNormalization(axis=3)(inputs[0]))
    outputs.append(BatchNormalization(axis=4)(inputs[0]))
    outputs.append(BatchNormalization(axis=5)(inputs[0]))
    outputs.append(BatchNormalization()(inputs[2]))
    outputs.append(BatchNormalization(axis=1)(inputs[2]))
    outputs.append(BatchNormalization(axis=2)(inputs[2]))
    outputs.append(BatchNormalization(axis=3)(inputs[2]))
    outputs.append(BatchNormalization(axis=4)(inputs[2]))
    outputs.append(BatchNormalization()(inputs[4]))
    # todo: check if TensorFlow >= 2.1 supports this
    #outputs.append(BatchNormalization(axis=1)(inputs[4])) # tensorflow.python.framework.errors_impl.InternalError:  The CPU implementation of FusedBatchNorm only supports NHWC tensor format for now.
    outputs.append(BatchNormalization(axis=2)(inputs[4]))
    outputs.append(BatchNormalization(axis=3)(inputs[4]))
    outputs.append(BatchNormalization()(inputs[6]))
    outputs.append(BatchNormalization(axis=1)(inputs[6]))
    outputs.append(BatchNormalization(axis=2)(inputs[6]))
    outputs.append(BatchNormalization()(inputs[8]))
    outputs.append(BatchNormalization(axis=1)(inputs[8]))
    outputs.append(BatchNormalization()(inputs[27]))
    outputs.append(BatchNormalization(axis=1)(inputs[27]))
    outputs.append(BatchNormalization()(inputs[14]))
    outputs.append(BatchNormalization(axis=1)(inputs[14]))
    outputs.append(BatchNormalization(axis=2)(inputs[14]))
    outputs.append(BatchNormalization()(inputs[16]))
    # todo: check if TensorFlow >= 2.1 supports this
    #outputs.append(BatchNormalization(axis=1)(inputs[16])) # tensorflow.python.framework.errors_impl.InternalError:  The CPU implementation of FusedBatchNorm only supports NHWC tensor format for now.
    outputs.append(BatchNormalization(axis=2)(inputs[16]))
    outputs.append(BatchNormalization(axis=3)(inputs[16]))
    outputs.append(BatchNormalization()(inputs[18]))
    outputs.append(BatchNormalization(axis=1)(inputs[18]))
    outputs.append(BatchNormalization(axis=2)(inputs[18]))
    outputs.append(BatchNormalization(axis=3)(inputs[18]))
    outputs.append(BatchNormalization(axis=4)(inputs[18]))
    outputs.append(BatchNormalization()(inputs[20]))
    outputs.append(BatchNormalization(axis=1)(inputs[20]))
    outputs.append(BatchNormalization(axis=2)(inputs[20]))
    outputs.append(BatchNormalization(axis=3)(inputs[20]))
    outputs.append(BatchNormalization(axis=4)(inputs[20]))
    outputs.append(BatchNormalization(axis=5)(inputs[20]))

    outputs.append(Dropout(0.5)(inputs[4]))

    outputs.append(ZeroPadding2D(2)(inputs[4]))
    outputs.append(ZeroPadding2D((2, 3))(inputs[4]))
    outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[4]))
    outputs.append(Cropping2D(2)(inputs[4]))
    outputs.append(Cropping2D((2, 3))(inputs[4]))
    outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[4]))

    outputs.append(Dense(3, use_bias=True)(inputs[13]))
    outputs.append(Dense(3, use_bias=True)(inputs[14]))
    outputs.append(Dense(4, use_bias=False)(inputs[16]))
    outputs.append(Dense(4, use_bias=False, activation='tanh')(inputs[18]))
    outputs.append(Dense(4, use_bias=False)(inputs[20]))

    outputs.append(Reshape(((2 * 3 * 4 * 5 * 6),))(inputs[0]))
    outputs.append(Reshape((2, 3 * 4 * 5 * 6))(inputs[0]))
    outputs.append(Reshape((2, 3, 4 * 5 * 6))(inputs[0]))
    outputs.append(Reshape((2, 3, 4, 5 * 6))(inputs[0]))
    outputs.append(Reshape((2, 3, 4, 5, 6))(inputs[0]))

    outputs.append(Reshape((16,))(inputs[8]))
    outputs.append(Reshape((2, 8))(inputs[8]))
    outputs.append(Reshape((2, 2, 4))(inputs[8]))
    outputs.append(Reshape((2, 2, 2, 2))(inputs[8]))
    outputs.append(Reshape((2, 2, 1, 2, 2))(inputs[8]))

    outputs.append(UpSampling2D(size=(1, 2), interpolation='nearest')(inputs[4]))
    outputs.append(UpSampling2D(size=(5, 3), interpolation='nearest')(inputs[4]))
    outputs.append(UpSampling2D(size=(1, 2), interpolation='bilinear')(inputs[4]))
    outputs.append(UpSampling2D(size=(5, 3), interpolation='bilinear')(inputs[4]))

    for axis in [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5]:
        outputs.append(Concatenate(axis=axis)([inputs[0], inputs[1]]))
    for axis in [-4, -3, -2, -1, 1, 2, 3, 4]:
        outputs.append(Concatenate(axis=axis)([inputs[2], inputs[3]]))
    for axis in [-3, -2, -1, 1, 2, 3]:
        outputs.append(Concatenate(axis=axis)([inputs[4], inputs[5]]))
    for axis in [-2, -1, 1, 2]:
        outputs.append(Concatenate(axis=axis)([inputs[6], inputs[7]]))
    for axis in [-1, 1]:
        outputs.append(Concatenate(axis=axis)([inputs[8], inputs[9]]))
    for axis in [-1, 2]:
        outputs.append(Concatenate(axis=axis)([inputs[14], inputs[15]]))
    for axis in [-1, 3]:
        outputs.append(Concatenate(axis=axis)([inputs[16], inputs[17]]))
    for axis in [-1, 4]:
        outputs.append(Concatenate(axis=axis)([inputs[18], inputs[19]]))
    for axis in [-1, 5]:
        outputs.append(Concatenate(axis=axis)([inputs[20], inputs[21]]))

    outputs.append(UpSampling1D(size=2)(inputs[6]))
    # outputs.append(UpSampling1D(size=2)(inputs[8])) # ValueError: Input 0 of layer up_sampling1d_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 16]

    outputs.append(Multiply()([inputs[10], inputs[11]]))
    outputs.append(Multiply()([inputs[11], inputs[10]]))
    outputs.append(Multiply()([inputs[11], inputs[13]]))
    outputs.append(Multiply()([inputs[10], inputs[11], inputs[12]]))
    outputs.append(Multiply()([inputs[11], inputs[12], inputs[13]]))

    shared_conv = Conv2D(1, (1, 1),
                         padding='valid', name='shared_conv', activation='relu')

    up_scale_2 = UpSampling2D((2, 2))
    x1 = shared_conv(up_scale_2(inputs[23]))  # (1, 8, 8)
    x2 = shared_conv(up_scale_2(inputs[24]))  # (1, 8, 8)
    x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[24]))  # (1, 8, 8)
    x = Concatenate()([x1, x2, x3])  # (3, 8, 8)
    outputs.append(x)

    x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x)  # (3, 8, 8)
    outputs.append(x)
    x = Dropout(0.5)(x)
    outputs.append(x)
    x = Concatenate()([
        MaxPooling2D((2, 2))(x),
        AveragePooling2D((2, 2))(x)])  # (6, 4, 4)
    outputs.append(x)

    x = Flatten()(x)  # (1, 1, 96)
    x = Dense(4, use_bias=False)(x)
    outputs.append(x)
    x = Dense(3)(x)  # (1, 1, 3)
    outputs.append(x)

    outputs.append(Add()([inputs[26], inputs[30], inputs[30]]))
    outputs.append(Subtract()([inputs[26], inputs[30]]))
    outputs.append(Multiply()([inputs[26], inputs[30], inputs[30]]))
    outputs.append(Average()([inputs[26], inputs[30], inputs[30]]))
    outputs.append(Maximum()([inputs[26], inputs[30], inputs[30]]))
    outputs.append(Concatenate()([inputs[26], inputs[30], inputs[30]]))

    intermediate_input_shape = (3,)
    intermediate_in = Input(intermediate_input_shape)
    intermediate_x = intermediate_in
    intermediate_x = Dense(8)(intermediate_x)
    intermediate_x = Dense(5, name='duplicate_layer_name')(intermediate_x)
    intermediate_model = Model(
        inputs=[intermediate_in], outputs=[intermediate_x],
        name='intermediate_model')
    intermediate_model.compile(loss='mse', optimizer='nadam')

    x = intermediate_model(x)  # (1, 1, 5)

    intermediate_model_2 = Sequential()
    intermediate_model_2.add(Dense(7, input_shape=(5,)))
    intermediate_model_2.add(Dense(5, name='duplicate_layer_name'))
    intermediate_model_2.compile(optimizer='rmsprop',
                                 loss='categorical_crossentropy')

    x = intermediate_model_2(x)  # (1, 1, 5)

    x = Dense(3)(x)  # (1, 1, 3)

    shared_activation = Activation('tanh')

    outputs = outputs + [
        Activation('tanh')(inputs[25]),
        Activation('hard_sigmoid')(inputs[25]),
        Activation('selu')(inputs[25]),
        Activation('sigmoid')(inputs[25]),
        Activation('softplus')(inputs[25]),
        Activation('softmax')(inputs[25]),
        Activation('softmax')(inputs[25]),
        Activation('relu')(inputs[25]),
        LeakyReLU()(inputs[25]),
        ELU()(inputs[25]),
        PReLU()(inputs[24]),
        PReLU()(inputs[25]),
        PReLU()(inputs[26]),
        shared_activation(inputs[25]),
        Activation('linear')(inputs[26]),
        Activation('linear')(inputs[23]),
        x,
        shared_activation(x),
    ]

    model = Model(inputs=inputs, outputs=outputs, name='test_model_exhaustive')
    model.compile(loss='mse', optimizer='nadam')

    # fit to dummy data
    training_data_size = 2
    data_in = generate_input_data(training_data_size, input_shapes)
    initial_data_out = model.predict(data_in)
    data_out = generate_output_data(training_data_size, initial_data_out)
    model.fit(data_in, data_out, epochs=10)
    return model
Ejemplo n.º 8
0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, LSTM, Reshape, Dense, Dropout
from load_data import *

batch_size = 1
inputs = Input(shape = (sampled_data.shape[1],sampled_data.shape[2],sampled_data.shape[3]), batch_size = batch_size)

conv2d_1 = Conv2D(filters = 32, kernel_size = (3,3), strides = 1, padding = "same", activation='relu')(inputs)
conv2d_1 = MaxPool2D(pool_size = (2,2), padding = "same")(conv2d_1)
conv2d_1 = Dropout(.3)(conv2d_1)

conv2d_2 = Conv2D(filters = 64, kernel_size = (3,3), strides = 1, padding = "same", activation='relu')(conv2d_1)
conv2d_2 = MaxPool2D(pool_size = (2,2), padding = "same")(conv2d_2)
conv2d_2 = Dropout(.3)(conv2d_2)

reshape = Reshape((222, -1))(conv2d_2)

lstm = LSTM(200, return_sequences = False)(reshape)

dense_1 = Dense(64, activation = "relu")(lstm)
dense_2 = Dense(32, activation = "relu")(dense_1)

outputs = Dense(len(set(labels)), activation = "softmax")(dense_2)

rcnn = Model(inputs, outputs)
rcnn.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
rcnn.summary()

rcnn.fit(X_train, y_train, epochs=10, batch_size=1, validation_data=(X_val, y_val))
Ejemplo n.º 9
0
def get_model_v10(num_classes, preprocessing=None, bypass_type=None):
    """Create SqueezeNet architecture as described in [1], version 1.0.

    This is v1.0 implementation, it corresponds to original paper
    description. Check [2] for more implementation information.

    Parameters
    ----------
    preprocessing : model
        Input preprocessing module that adapts input shape to what
        SqueezeNet expects. If None, SqueezeNet uses Conv2D and MaxPool as
        described in Table 1 [1] and input shape is fixed to 227x227x3.
        Preprocessing module must output data with size (55x55x96) which
        is input to first fire module (fire2 in Table 1 [1]).
        Example: input = Input(shape=(110, 110, 3))
                 output = Conv2D(96, (3, 3), (2,2))(input)
                 preprocessing = Model(input, output)

    num_classes : int
        Number of classes to detect.

    bypass_type : {None, 'simple', 'complex'}
        Bypass type to be applied, see Fig. 2 [1] for more detail.

    Returns
    -------
    model : model
        Keras model of SqueezeNet architecture.

    """
    bypass = set_bypass(bypass_type=bypass_type)
    if preprocessing is None:
        inputs = Input(shape=(227, 227, 3))
        net = default_preprocessor(inputs, version='1.0')
    else:
        pass
        # inputs = preprocessing.input
        # net = preprocessing.output
        # assert net.get_shape()[1:] == (55, 55, 96)
    net = fire_module(net, 'fire2', 16, bypass['fire1'])
    net = fire_module(net, 'fire3', 16, bypass['fire2'])
    net = fire_module(net, 'fire4', 32, bypass['fire3'])
    net = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(net)

    net = fire_module(net, 'fire5', 32, bypass['fire5'])
    net = fire_module(net, 'fire6', 48, bypass['fire6'])
    net = fire_module(net, 'fire7', 48, bypass['fire7'])
    net = fire_module(net, 'fire8', 64, bypass['fire8'])
    net = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(net)

    net = fire_module(net, 'fire9', 64, bypass=bypass['fire9'])
    net = Conv2D(
        filters=num_classes,
        kernel_size=(1, 1),
        strides=(1, 1),
        activation='relu',
        name='conv10',
    )(net)
    net = GlobalAveragePooling2D()(net)
    net = Reshape((num_classes, ))(net)
    net = Softmax()(net)
    model = Model(inputs, net)
    return model
Ejemplo n.º 10
0
    def build_model(self):

        # Input layer
        inputs = Input(name='inputs',
                       shape=self.input_shape_hwc,
                       dtype='float32')

        # Convolution layer (VGG)
        y = Conv2D(16, (3, 3), padding='same',
                   kernel_initializer='he_normal')(inputs)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        y = MaxPooling2D(pool_size=(2, 2))(y)

        y = Conv2D(32, (3, 3), padding='same',
                   kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        y = MaxPooling2D(pool_size=(2, 2))(y)

        y = Conv2D(32, (3, 3), padding='same',
                   kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        y = MaxPooling2D(pool_size=(1, 2))(y)

        y = Conv2D(64, (3, 3), padding='same',
                   kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        y = MaxPooling2D(pool_size=(1, 2))(y)

        y = Conv2D(64, (3, 3), padding='same',
                   kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        y = MaxPooling2D(pool_size=(1, 2))(y)

        y = Conv2D(128, (3, 3), padding='same',
                   kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)
        y = Activation('relu')(y)
        y = MaxPooling2D(pool_size=(1, 2))(y)

        # CNN to RNN
        y = Reshape(target_shape=((32, -1)))(y)
        y = Dense(128, activation='relu', kernel_initializer='he_normal')(y)

        # RNN layer
        lstm_1 = LSTM(128,
                      return_sequences=True,
                      kernel_initializer='he_normal')(y)
        lstm_1b = LSTM(128,
                       return_sequences=True,
                       go_backwards=True,
                       kernel_initializer='he_normal')(y)
        lstm1_merged = add([lstm_1, lstm_1b])

        lstm_2 = LSTM(128,
                      return_sequences=True,
                      kernel_initializer='he_normal')(lstm1_merged)
        lstm_2b = LSTM(128,
                       return_sequences=True,
                       go_backwards=True,
                       kernel_initializer='he_normal')(lstm1_merged)
        lstm2_merged = concatenate([lstm_2, lstm_2b])

        # transforms RNN output to character activations:
        y = Dense(self.class_num, kernel_initializer='he_normal')(lstm2_merged)
        y_pred = Activation('softmax', name='softmax')(y)

        labels = Input(name='labels', shape=[self.max_text_len], dtype='int64')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')

        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(self.ctc_func, output_shape=(1, ), name='ctc')(
            [y_pred, labels, input_length, label_length])  #(None, 1)

        train_model = Model(
            inputs=[inputs, labels, input_length, label_length],
            outputs=loss_out)

        predict_model = Model(inputs=[inputs], outputs=y_pred)

        return train_model, predict_model
def SegNet():
    model = Sequential()
    #encoder
    model.add(
        Conv2D(64, (3, 3),
               strides=(1, 1),
               input_shape=(3, img_w, img_h),
               padding='same',
               activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #(128,128)
    model.add(
        Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #(64,64)
    model.add(
        Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #(32,32)
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #(16,16)
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    #(8,8)
    #decoder
    model.add(UpSampling2D(size=(2, 2)))
    #(16,16)
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(UpSampling2D(size=(2, 2)))
    #(32,32)
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(UpSampling2D(size=(2, 2)))
    #(64,64)
    model.add(
        Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(UpSampling2D(size=(2, 2)))
    #(128,128)
    model.add(
        Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(UpSampling2D(size=(2, 2)))
    #(256,256)
    model.add(
        Conv2D(64, (3, 3),
               strides=(1, 1),
               input_shape=(3, img_w, img_h),
               padding='same',
               activation='relu'))
    model.add(BatchNormalization())
    model.add(
        Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(n_label, (1, 1), strides=(1, 1), padding='same'))
    model.add(Reshape((n_label, img_w * img_h)))
    #axis=1和axis=2互换位置,等同于np.swapaxes(layer,1,2)
    model.add(Permute((2, 1)))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])
    model.summary()
    return model
# Shape info needed to build Decoder Model
shape = K.int_shape(x)

# Generate the latent vector
x = Flatten()(x)
latent = Dense(latent_dim, name='latent_vector')(x)

# Instantiate Encoder Model
encoder = Model(inputs, latent, name='encoder')
encoder.summary()

# Build the Decoder Model
latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

# Stack of Transposed Conv2D blocks
# Notes:
# 1) Use Batch Normalization before ReLU on deep networks
# 2) Use UpSampling2D as alternative to strides>1
# - faster but not as good as strides>1
for filters in layer_filters[::-1]:
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        strides=2,
                        activation='relu',
                        padding='same')(x)

x = Conv2DTranspose(filters=1,
                    kernel_size=kernel_size,
Ejemplo n.º 13
0
# embedding size
embedding_dim = 100
hidden_dim = 50

# Filter parameters
filter_sizes = [5, 6, 7]
num_filters = 30

vocabulary_size = len(dl.word_to_index_dict)
print(vocabulary_size)
#input
inputs = Input(shape=(seq_len, ), dtype='int32')
embedding = Embedding(input_dim=vocabulary_size,
                      output_dim=embedding_dim,
                      input_length=seq_len)(inputs)
reshape = Reshape((seq_len, embedding_dim, 1))(embedding)

conv_0 = Conv2D(num_filters,
                kernel_size=(filter_sizes[0], embedding_dim),
                padding='valid',
                kernel_initializer='normal',
                activation='relu',
                name='conv_0')(reshape)
conv_1 = Conv2D(num_filters,
                kernel_size=(filter_sizes[1], embedding_dim),
                padding='valid',
                kernel_initializer='normal',
                activation='relu',
                name='conv_1')(reshape)
conv_2 = Conv2D(num_filters,
                kernel_size=(filter_sizes[2], embedding_dim),
def Deeplabv3pMobileNetV3Small(input_shape=(512, 512, 3),
                               alpha=1.0,
                               weights=None,
                               input_tensor=None,
                               classes=21,
                               OS=8,
                               **kwargs):
    """ Instantiates the Deeplabv3+ MobileNetV3Small architecture
    # Arguments
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        alpha: controls the width of the MobileNetV3Small network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.

    # Returns
        A Keras model instance.
    """
    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='image_input')
    else:
        img_input = input_tensor

    # normalize input image
    img_norm = Lambda(normalize, name='input_normalize')(img_input)

    # backbone body for feature extract
    x, skip_feature, backbone_len = MobileNetV3Small(include_top=False,
                                                     input_tensor=img_norm,
                                                     weights='imagenet',
                                                     OS=OS,
                                                     alpha=1.0)

    # ASPP block
    x = ASPP_block(x, OS)

    # Deeplabv3+ decoder for feature projection
    x = Decoder_block(x, skip_feature)

    # Final prediction conv block
    x = DeeplabConv2D(classes, (1, 1), padding='same',
                      name='logits_semantic')(x)
    x = Lambda(img_resize,
               arguments={
                   'size': (input_shape[0], input_shape[1]),
                   'mode': 'bilinear'
               },
               name='pred_resize')(x)
    x = Reshape((input_shape[0] * input_shape[1], classes))(x)
    x = Softmax(name='Predictions/Softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    #if input_tensor is not None:
    #inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input
    model = Model(img_input, x, name='deeplabv3p_mobilenetv3small')

    return model, backbone_len
def Deeplabv3pXception(input_shape=(512, 512, 3),
                       weights='pascal_voc',
                       input_tensor=None,
                       classes=21,
                       OS=16,
                       **kwargs):
    """ Instantiates the Deeplabv3+ architecture
    Optionally loads weights pre-trained
    on PASCAL VOC. This model is available for TensorFlow only,
    and can only be used with inputs following the TensorFlow
    data format `(width, height, channels)`.
    # Arguments
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
    # Returns
        A Keras model instance.
    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`
    """

    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='image_input')
    else:
        img_input = input_tensor

    # normalize input image
    img_norm = Lambda(normalize, name='input_normalize')(img_input)

    # backbone body for feature extract
    x, skip_feature, backbone_len = Xception_body(img_norm, OS)

    # ASPP block
    x = ASPP_block(x, OS)

    # Deeplabv3+ decoder for feature projection
    x = Decoder_block(x, skip_feature)

    # Final prediction conv block
    x = DeeplabConv2D(classes, (1, 1), padding='same',
                      name='logits_semantic')(x)
    x = Lambda(img_resize,
               arguments={
                   'size': (input_shape[0], input_shape[1]),
                   'mode': 'bilinear'
               },
               name='pred_resize')(x)
    x = Reshape((input_shape[0] * input_shape[1], classes))(x)
    x = Softmax(name='Predictions/Softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    #if input_tensor is not None:
    #inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input

    model = Model(img_input, x, name='deeplabv3p_xception')

    # load weights
    if weights == 'pascal_voc':
        weights_path = get_file(
            'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
            WEIGHTS_PATH_X,
            cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    return model, backbone_len
Ejemplo n.º 16
0
def gru_model():
    emb_n = 64
    category_num = {
         'adidmd5': (780369, emb_n),
         'idfamd5': (360, emb_n),
         'imeimd5': (1021836, emb_n),
         'macmd5': (329184, emb_n),
         'openudidmd5': (85051, emb_n),
         'ip': (813719, emb_n),
         'reqrealip': (9748, emb_n),
        'adunitshowid': (800, emb_n),
        'apptype': (91, emb_n),
        'carrier': (4, emb_n),
        'city': (331, emb_n),
        'dvctype': (3, emb_n),
        'model': (5923, emb_n),  # 7957 7958  5922
        'make': (1704, emb_n),
        'mediashowid': (313, emb_n),
        'ntt': (7, emb_n),
        'orientation': (2, emb_n),
        'osv': (185, emb_n),
        'pkgname': (2368, emb_n),
        'ppi': (119, emb_n),
        'ver': (3268, emb_n),
        'screen_area': (1396, emb_n),
        'creative_dpi': (1763, emb_n),
        'hour': (24, emb_n),
        'lan': (33, emb_n),
        'h': (985, emb_n),
        'w': (449, emb_n),

    }
    # 类别型变量输入
    category_inp = Input(shape=(len(category),), name='category_inp')
    cat_embeds = []
    for idx, col in enumerate(category):
        x = Lambda(lambda x: x[:, idx, None])(category_inp)
        x = Embedding(category_num[col][0], category_num[col][1], input_length=1)(x)
        cat_embeds.append(x)
    embeds = concatenate(cat_embeds, axis=2)
    embeds = GaussianDropout(0.5)(embeds)
    # 数值型变量输入
    numerical_inp = Input(shape=(len(numerical),), name='continous_inp')
    print('numerical', len(numerical) // 8 * 8 + 8)
    x2 = Dense(len(numerical) // 8 + 8, activation='relu', kernel_initializer='random_uniform',
               bias_initializer='zeros')(
        numerical_inp)
    x2 = Dropout(0.5)(x2)
    x2 = BatchNormalization()(x2)
    x2 = Reshape([1, int(x2.shape[1])])(x2)
    x = concatenate([embeds, x2], axis=2)
    # 主干网络
    x = CuDNNGRU(128)(x)
    x = BatchNormalization()(x)
    x = Dropout(0.50)(x)
    x = Dense(64, activation='relu', kernel_initializer='random_uniform')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.50)(x)
    x = Dense(32, activation='relu', kernel_initializer='random_uniform')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.50)(x)
    out_p = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[category_inp, numerical_inp], outputs=out_p)
    def G_block(self, res):
        block_name = '%dx%d' % (2**res, 2**res)
        # res = 2 ... resolution_log2
        if res == 2:  # 4x4
            # Linear block
            # Gain is overridden to match the original implementation
            # sqrt(2) / 4 was used with He init
            projecting_layer = self.dense(
                units=np.prod(self.projecting_target_shape),
                gain=self.gain / self.projecting_gain_correction)
            linear_layers = [
                projecting_layer,
                Reshape(target_shape=self.projecting_target_shape,
                        dtype=self.policy),
                self.act()
            ]
            if self.use_bias: linear_layers = self.apply_bias(linear_layers)
            if self.use_pixelnorm: linear_layers = self.PN(linear_layers)
            linear_block = tf.keras.Sequential(linear_layers,
                                               name='Projecting')

            # Conv block
            conv_layers = [
                self.conv2d(fmaps=self.G_n_filters(res - 1)),
                self.act()
            ]
            if self.use_bias: conv_layers = self.apply_bias(conv_layers)
            if self.use_pixelnorm: conv_layers = self.PN(conv_layers)
            conv_block = tf.keras.Sequential(conv_layers, name='Conv')

            # Full block
            block_model = tf.keras.Sequential([linear_block, conv_block],
                                              name=block_name)
        else:  # 8x8 and up
            # 1st conv block
            if self.G_fused_scale:
                conv0_layers = [
                    self.conv2d(fmaps=self.G_n_filters(res - 1),
                                fused_up=True),
                    self.act()
                ]
                if self.use_bias: conv0_layers = self.apply_bias(conv0_layers)
                if self.use_pixelnorm: conv0_layers = self.PN(conv0_layers)
                conv0_block = tf.keras.Sequential(conv0_layers,
                                                  name='Conv0_up')

                block_layers = [conv0_block]
            else:
                conv0_layers = [
                    self.conv2d(fmaps=self.G_n_filters(res - 1)),
                    self.act()
                ]
                if self.use_bias: conv0_layers = self.apply_bias(conv0_layers)
                if self.use_pixelnorm: conv0_layers = self.PN(conv0_layers)
                conv0_block = tf.keras.Sequential(conv0_layers, name='Conv0')

                block_layers = [self.up_layers[res], conv0_block]

            # 2nd conv block
            conv1_layers = [
                self.conv2d(fmaps=self.G_n_filters(res - 1)),
                self.act()
            ]
            if self.use_bias: conv1_layers = self.apply_bias(conv1_layers)
            if self.use_pixelnorm: conv1_layers = self.PN(conv1_layers)
            conv1_block = tf.keras.Sequential(conv1_layers, name='Conv1')

            # Full block
            block_layers += [conv1_block]
            block_model = tf.keras.Sequential(block_layers, name=block_name)

        return block_model
Ejemplo n.º 18
0
 def func(labels):
     labels = RepeatVector(size * size)(labels)
     labels = Reshape((size, size, 1))(labels)
     return labels
# In[4]:

# 载入预训练的resnet50模型
resnet50 = ResNet50(weights='imagenet',
                    include_top=False,
                    input_shape=(height, width, 3))

# In[5]:

# 设置输入
image_input = Input((height, width, 3), name='image_input')
# 使用resnet50进行特征提取
x = resnet50(image_input)
# 搭建RNN网络
x = Reshape((10, 2048))(x)
x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x)
x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x)
x = Dense(num_classes, activation='softmax')(x)
# 定义模型
model = Model(image_input, x)

# In[6]:

# 定义标签输入
labels = Input(shape=(max_len), name='max_len')
# 输入长度
input_len = Input(shape=(1), name='input_len')
# 标签长度
label_len = Input(shape=(1), name='label_len')
# Lambda的作用是可以将自定义的函数封装到网络中,用于自定义的一些数据计算处理
Ejemplo n.º 20
0
    def _build(self):

        ### THE ENCODER
        encoder_input = Input(shape=self.input_dim, name='encoder_input')

        x = encoder_input

        for i in range(self.n_layers_encoder):
            conv_layer = Conv2D(filters=self.encoder_conv_filters[i],
                                kernel_size=self.encoder_conv_kernel_size[i],
                                strides=self.encoder_conv_strides[i],
                                padding='same',
                                name='encoder_conv_' + str(i))

            x = conv_layer(x)

            if self.use_batch_norm:
                x = BatchNormalization()(x)

            x = LeakyReLU()(x)

            if self.use_dropout:
                x = Dropout(rate=0.25)(x)

        shape_before_flattening = K.int_shape(x)[1:]

        x = Flatten()(x)
        self.mu = Dense(self.z_dim, name='mu')(x)
        self.log_var = Dense(self.z_dim, name='log_var')(x)

        self.z = Sampling(name='encoder_output')([self.mu, self.log_var])

        self.encoder = Model(encoder_input, [self.mu, self.log_var, self.z],
                             name='encoder')

        ### THE DECODER

        decoder_input = Input(shape=(self.z_dim, ), name='decoder_input')

        x = Dense(np.prod(shape_before_flattening))(decoder_input)
        x = Reshape(shape_before_flattening)(x)

        for i in range(self.n_layers_decoder):
            conv_t_layer = Conv2DTranspose(
                filters=self.decoder_conv_t_filters[i],
                kernel_size=self.decoder_conv_t_kernel_size[i],
                strides=self.decoder_conv_t_strides[i],
                padding='same',
                name='decoder_conv_t_' + str(i))

            x = conv_t_layer(x)

            if i < self.n_layers_decoder - 1:
                if self.use_batch_norm:
                    x = BatchNormalization()(x)
                x = LeakyReLU()(x)
                if self.use_dropout:
                    x = Dropout(rate=0.25)(x)
            else:
                x = Activation('sigmoid')(x)

        decoder_output = x

        self.decoder = Model(decoder_input, decoder_output, name='decoder')

        ### THE FULL VAE

        self.model = VAEModel(self.encoder, self.decoder, self.r_loss_factor)
Ejemplo n.º 21
0
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层一:3*3卷积层
		隐藏层二:池化层,池化窗口大小为2
		隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层四:循环层、LSTM/GRU层
		隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''
        # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h1 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.1)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h2)  # 池化层
        #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.2)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h3)  # 卷积层
        layer_h4 = Dropout(0.2)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.3)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.3)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.3)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.4)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h11)  # 池化层

        #test=Model(inputs = input_data, outputs = layer_h6)
        #test.summary()

        layer_h13 = Reshape((200, 3200))(layer_h12)  #Reshape层

        layer_h13 = Dropout(0.4)(layer_h13)
        layer_h14 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h13)  # 全连接层
        layer_h14 = Dropout(0.4)(layer_h14)
        inner = layer_h14
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])
        #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11)

        layer_h15 = Dropout(0.4)(gru2)
        layer_h16 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h15)  # 全连接层

        layer_h16 = Dropout(0.5)(layer_h16)  # 随机中断部分神经网络连接,防止过拟合
        layer_h17 = Dense(self.MS_OUTPUT_SIZE,
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h16)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h17)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06)

        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=ada_d)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        print('[*提示] 创建模型成功,模型编译成功')
        return model, model_data
Ejemplo n.º 22
0
def build_model(image_size,
                n_classes,
                mode='training',
                l2_regularization=0.0,
                min_scale=0.1,
                max_scale=0.9,
                scales=None,
                aspect_ratios_global=[0.5, 1.0, 2.0],
                aspect_ratios_per_layer=None,
                two_boxes_for_ar1=True,
                steps=None,
                offsets=None,
                clip_boxes=False,
                variances=[1.0, 1.0, 1.0, 1.0],
                coords='centroids',
                normalize_coords=False,
                subtract_mean=None,
                divide_by_stddev=None,
                swap_channels=False,
                confidence_thresh=0.01,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400,
                return_predictor_sizes=False):
    '''
    Build a Keras model with SSD architecture, see references.

    The model consists of convolutional feature layers and a number of convolutional
    predictor layers that take their input from different feature layers.
    The model is fully convolutional.

    The implementation found here is a smaller version of the original architecture
    used in the paper (where the base network consists of a modified VGG-16 extended
    by a few convolutional feature layers), but of course it could easily be changed to
    an arbitrarily large SSD architecture by following the general design pattern used here.
    This implementation has 7 convolutional layers and 4 convolutional predictor
    layers that take their input from layers 4, 5, 6, and 7, respectively.

    Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network, the parameters passed here must
    be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading
    trained weights, the parameters passed here must be the same as the ones used
    to produce the trained weights.

    Some of these arguments are explained in more detail in the documentation of the
    `SSDBoxEncoder` class.

    Note: Requires Keras v2.0 or later. Training currently works only with the
    TensorFlow backend (v1.0 or later).

    Arguments:
        image_size (tuple): The input image size in the format `(height, width, channels)`.
        n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
        mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode,
            the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes,
            the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding,
            non-maximum suppression, and top-k filtering. The difference between latter two modes is that
            'inference' follows the exact procedure of the original Caffe implementation, while
            'inference_fast' uses a faster prediction decoding procedure.
        l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers.
        min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images.
        max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. All scaling factors between the smallest and the
            largest will be linearly interpolated. Note that the second to last of the linearly interpolated
            scaling factors will actually be the scaling factor for the last predictor layer, while the last
            scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
            if `two_boxes_for_ar1` is `True`.
        scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer.
            This list must be one element longer than the number of predictor layers. The first `k` elements are the
            scaling factors for the `k` predictor layers, while the last element is used for the second box
            for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
            last scaling factor must be passed either way, even if it is not being used. If a list is passed,
            this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero.
        aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
            generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios
            for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead.
        aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer.
            This allows you to set the aspect ratios for each predictor layer individually. If a list is passed,
            it overrides `aspect_ratios_global`.
        two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
            If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
            using the scaling factor for the respective layer, the second one will be generated using
            geometric mean of said scaling factor and next bigger scaling factor.
        steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
            pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
            the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
            If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
            If no steps are provided, then they will be computed such that the anchor box center points will form an
            equidistant grid within the image dimensions.
        offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either floats or tuples of two floats. These numbers represent for each predictor layer how many
            pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
            as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
            of the step size specified in the `steps` argument. If the list contains floats, then that value will
            be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
            `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size,
            which is also the recommended setting.
        clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
        variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
            its respective variance value.
        coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format
            of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width,
            and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates,
            i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
        subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values
            of any shape that is broadcast-compatible with the image shape. The elements of this array will be
            subtracted from the image pixel intensity values. For example, pass a list of three integers
            to perform per-channel mean normalization for color images.
        divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or
            floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
            intensity values will be divided by the elements of this array. For example, pass a list
            of three integers to perform per-channel standard deviation normalization for color images.
        swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input
            image channels should be swapped.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box's confidence score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage.
        nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage.
        return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also
            a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since
            you can always get their sizes easily via the Keras API, but it's convenient and less error-prone
            to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the
            spatial dimensions of the predictor layers), for inference you don't need them.

    Returns:
        model: The Keras SSD model.
        predictor_sizes (optional): A Numpy array containing the `(height, width)` portion
            of the output tensor shape for each convolutional predictor layer. During
            training, the generator function needs this in order to transform
            the ground truth labels into tensors of identical structure as the
            output tensors of the model, which is in turn needed for the cost
            function.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 4  # The number of predictor conv layers in the network
    n_classes += 1  # Account for the background class.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(
            variances
    ) != 4:  # We need one variance value for each of the four box coordinates
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Define functions for the Lambda layers below.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1 = Conv2D(32, (5, 5),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv1')(x1)
    conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(
        conv1
    )  # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    conv1 = ELU(name='elu1')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1)

    conv2 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv2')(pool1)
    conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2)
    conv2 = ELU(name='elu2')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2)

    conv3 = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv3')(pool2)
    conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3)
    conv3 = ELU(name='elu3')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3)

    conv4 = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv4')(pool3)
    conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4)
    conv4 = ELU(name='elu4')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4)

    conv5 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv5')(pool4)
    conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5)
    conv5 = ELU(name='elu5')(conv5)
    pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5)

    conv6 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv6')(pool5)
    conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6)
    conv6 = ELU(name='elu6')(conv6)
    pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6)

    conv7 = Conv2D(32, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv7')(pool6)
    conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7)
    conv7 = ELU(name='elu7')(conv7)

    # The next part is to add the convolutional predictor layers on top of the base network
    # that we defined above. Note that I use the term "base network" differently than the paper does.
    # To me, the base network is everything that is not convolutional predictor layers or anchor
    # box layers. In this case we'll have four predictor layers, but of course you could
    # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of
    # predictor layers on top of the base network by simply following the pattern shown here.

    # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7.
    # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization)
    # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes`
    # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4`
    # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)`
    classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes4')(conv4)
    classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes5')(conv5)
    classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes6')(conv6)
    classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes7')(conv7)
    # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)`
    boxes4 = Conv2D(n_boxes[0] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes4')(conv4)
    boxes5 = Conv2D(n_boxes[1] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes5')(conv5)
    boxes6 = Conv2D(n_boxes[2] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes6')(conv6)
    boxes7 = Conv2D(n_boxes[3] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes7')(conv7)

    # Generate the anchor boxes
    # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)`
    anchors4 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[0],
                           next_scale=scales[1],
                           aspect_ratios=aspect_ratios[0],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[0],
                           this_offsets=offsets[0],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors4')(boxes4)
    anchors5 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[1],
                           next_scale=scales[2],
                           aspect_ratios=aspect_ratios[1],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[1],
                           this_offsets=offsets[1],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors5')(boxes5)
    anchors6 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[2],
                           next_scale=scales[3],
                           aspect_ratios=aspect_ratios[2],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[2],
                           this_offsets=offsets[2],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors6')(boxes6)
    anchors7 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[3],
                           next_scale=scales[4],
                           aspect_ratios=aspect_ratios[3],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[3],
                           this_offsets=offsets[3],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors7')(boxes7)

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    classes4_reshaped = Reshape((-1, n_classes),
                                name='classes4_reshape')(classes4)
    classes5_reshaped = Reshape((-1, n_classes),
                                name='classes5_reshape')(classes5)
    classes6_reshaped = Reshape((-1, n_classes),
                                name='classes6_reshape')(classes6)
    classes7_reshaped = Reshape((-1, n_classes),
                                name='classes7_reshape')(classes7)
    # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4)
    boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5)
    boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6)
    boxes7_reshaped = Reshape((-1, 4), name='boxes7_reshape')(boxes7)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4)
    anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5)
    anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6)
    anchors7_reshaped = Reshape((-1, 8), name='anchors7_reshape')(anchors7)

    # Concatenate the predictions from the different layers and the assosciated anchor box tensors
    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1
    # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes)
    classes_concat = Concatenate(axis=1, name='classes_concat')([
        classes4_reshaped, classes5_reshaped, classes6_reshaped,
        classes7_reshaped
    ])

    # Output shape of `boxes_concat`: (batch, n_boxes_total, 4)
    boxes_concat = Concatenate(axis=1, name='boxes_concat')(
        [boxes4_reshaped, boxes5_reshaped, boxes6_reshaped, boxes7_reshaped])

    # Output shape of `anchors_concat`: (batch, n_boxes_total, 8)
    anchors_concat = Concatenate(axis=1, name='anchors_concat')([
        anchors4_reshaped, anchors5_reshaped, anchors6_reshaped,
        anchors7_reshaped
    ])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    classes_softmax = Activation('softmax',
                                 name='classes_softmax')(classes_concat)

    # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [classes_softmax, boxes_concat, anchors_concat])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        # The spatial dimensions are the same for the `classes` and `boxes` predictor layers.
        predictor_sizes = np.array([
            classes4._keras_shape[1:3], classes5._keras_shape[1:3],
            classes6._keras_shape[1:3], classes7._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
def build_cifar10_generator(ngf=64, z_dim=128):
    """ Builds CIFAR10 DCGAN Generator Model
    PARAMS
    ------
    ngf: number of generator filters
    z_dim: number of dimensions in latent vector

    RETURN
    ------
    G: keras sequential
    """
    init = initializers.RandomNormal(stddev=0.02)

    G = Sequential()

    # Dense 1: 2x2x512
    G.add(
        Dense(2 * 2 * ngf * 8,
              input_shape=(z_dim, ),
              use_bias=True,
              kernel_initializer=init))
    G.add(Reshape((2, 2, ngf * 8)))
    G.add(BatchNormalization())
    G.add(LeakyReLU(0.2))

    # Conv 1: 4x4x256
    G.add(
        Conv2DTranspose(ngf * 4,
                        kernel_size=5,
                        strides=2,
                        padding='same',
                        use_bias=True,
                        kernel_initializer=init))
    G.add(BatchNormalization())
    G.add(LeakyReLU(0.2))

    # Conv 2: 8x8x128
    G.add(
        Conv2DTranspose(ngf * 2,
                        kernel_size=5,
                        strides=2,
                        padding='same',
                        use_bias=True,
                        kernel_initializer=init))
    G.add(BatchNormalization())
    G.add(LeakyReLU(0.2))

    # Conv 3: 16x16x64
    G.add(
        Conv2DTranspose(ngf,
                        kernel_size=5,
                        strides=2,
                        padding='same',
                        use_bias=True,
                        kernel_initializer=init))
    G.add(BatchNormalization())
    G.add(LeakyReLU(0.2))

    # Conv 4: 32x32x3
    G.add(
        Conv2DTranspose(3,
                        kernel_size=5,
                        strides=2,
                        padding='same',
                        use_bias=True,
                        kernel_initializer=init))
    G.add(Activation('tanh'))

    print("\nGenerator")
    G.summary()

    return G
Ejemplo n.º 24
0
batch_tr = return_batch(0, a, b, c, d, e)

# initialize outside generators for fit_model
gen_ts = gen_test(number)
gen_tr = gen_train(number)

#model

inp = Input(shape=(513,30,1),batch_size=32)
layer_conv1 = Conv2D(filters=50, kernel_size=(513,1), padding='valid')(inp)
layer_conv2 = Conv2D(filters=50, kernel_size=(1,15), padding='valid')(layer_conv1)
layer_flat = Flatten()(layer_conv2)
layer_dense = Dense(units=128, activation='relu')(layer_flat)

b1 = Dense(units=int(layer_flat.shape[1]), activation='relu')(layer_dense)
b1 = Reshape(target_shape=(int(layer_conv2.shape[1]),int(layer_conv2.shape[2]),int(layer_conv2.shape[3])))(b1)
b1 = Conv2DTranspose(filters=50, kernel_size=(1,15), padding='valid')(b1)
b1 = Conv2DTranspose(filters=1, kernel_size=(513,1), padding='valid')(b1)

b2 = Dense(units=int(layer_flat.shape[1]), activation='relu')(layer_dense)
b2 = Reshape(target_shape=(int(layer_conv2.shape[1]),int(layer_conv2.shape[2]),int(layer_conv2.shape[3])))(b2)
b2 = Conv2DTranspose(filters=50, kernel_size=(1,15), padding='valid')(b2)
b2 = Conv2DTranspose(filters=1, kernel_size=(513,1), padding='valid')(b2)


b3 = Dense(units=int(layer_flat.shape[1]), activation='relu')(layer_dense)
b3 = Reshape(target_shape=(int(layer_conv2.shape[1]),int(layer_conv2.shape[2]),int(layer_conv2.shape[3])))(b3)
b3 = Conv2DTranspose(filters=50, kernel_size=(1,15), padding='valid')(b3)
b3 = Conv2DTranspose(filters=1, kernel_size=(513,1), padding='valid')(b3)

Ejemplo n.º 25
0
def get_model(args):
    model_name = args.model_architecture

    label_count = 12
    model_settings = prepare_model_settings(label_count, args)

    if model_name == "fc4":
        model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(
                input_shape=(model_settings['spectrogram_length'],
                             model_settings['dct_coefficient_count'])),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(model_settings['label_count'],
                                  activation="softmax")
        ])

    elif model_name == 'ds_cnn':
        print("DS CNN model invoked")
        input_shape = [
            model_settings['spectrogram_length'],
            model_settings['dct_coefficient_count'], 1
        ]
        filters = 64
        weight_decay = 1e-4
        regularizer = l2(weight_decay)
        final_pool_size = (int(input_shape[0] / 2), int(input_shape[1] / 2))

        # Model layers
        # Input pure conv2d
        inputs = Input(shape=input_shape)
        x = Conv2D(filters, (10, 4),
                   strides=(2, 2),
                   padding='same',
                   kernel_regularizer=regularizer)(inputs)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(rate=0.2)(x)

        # First layer of separable depthwise conv2d
        # Separable consists of depthwise conv2d followed by conv2d with 1x1 kernels
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Second layer of separable depthwise conv2d
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Third layer of separable depthwise conv2d
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Fourth layer of separable depthwise conv2d
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Reduce size and apply final softmax
        x = Dropout(rate=0.4)(x)

        x = AveragePooling2D(pool_size=final_pool_size)(x)
        x = Flatten()(x)
        outputs = Dense(model_settings['label_count'], activation='softmax')(x)

        # Instantiate model.
        model = Model(inputs=inputs, outputs=outputs)

    elif model_name == 'td_cnn':
        print("TD CNN model invoked")
        input_shape = [
            model_settings['spectrogram_length'],
            model_settings['dct_coefficient_count'], 1
        ]
        print(f"Input shape = {input_shape}")
        filters = 64
        weight_decay = 1e-4
        regularizer = l2(weight_decay)

        # Model layers
        # Input time-domain conv
        inputs = Input(shape=input_shape)
        x = Conv2D(filters, (512, 1),
                   strides=(384, 1),
                   padding='valid',
                   kernel_regularizer=regularizer)(inputs)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(rate=0.2)(x)
        x = Reshape((41, 64, 1))(x)

        # True conv
        x = Conv2D(filters, (10, 4),
                   strides=(2, 2),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(rate=0.2)(x)

        # First layer of separable depthwise conv2d
        # Separable consists of depthwise conv2d followed by conv2d with 1x1 kernels
        # First layer of separable depthwise conv2d
        # Separable consists of depthwise conv2d followed by conv2d with 1x1 kernels
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Second layer of separable depthwise conv2d
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Third layer of separable depthwise conv2d
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Fourth layer of separable depthwise conv2d
        x = DepthwiseConv2D(depth_multiplier=1,
                            kernel_size=(3, 3),
                            padding='same',
                            kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(filters, (1, 1),
                   padding='same',
                   kernel_regularizer=regularizer)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

        # Reduce size and apply final softmax
        x = Dropout(rate=0.4)(x)

        # x = AveragePooling2D(pool_size=(25,5))(x)
        x = GlobalAveragePooling2D()(x)

        x = Flatten()(x)
        outputs = Dense(model_settings['label_count'], activation='softmax')(x)

        # Instantiate model.
        model = Model(inputs=inputs, outputs=outputs)

    else:
        raise ValueError("Model name {:} not supported".format(model_name))

    model.compile(
        #optimizer=keras.optimizers.RMSprop(learning_rate=args.learning_rate),  # Optimizer
        optimizer=keras.optimizers.Adam(
            learning_rate=args.learning_rate),  # Optimizer
        # Loss function to minimize
        loss=keras.losses.SparseCategoricalCrossentropy(),
        # List of metrics to monitor
        metrics=[keras.metrics.SparseCategoricalAccuracy()],
    )

    return model
def ResNet_autoencoder(height, width, depth, latentDim=16):
    X_input = Input(shape=(height, width, depth))

    X = X_input
    # encoder Stage 1
    X = Conv2D(32, (3, 3),
               strides=(2, 2),
               name='conv1-1',
               padding='same',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv1-1')(X)
    X = Activation('relu')(X)
    X = Conv2D(32, (1, 1),
               strides=(1, 1),
               name='conv1-2',
               padding='same',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv1-2')(X)

    skip_connect_1 = X
    X = Activation('relu')(X)

    # encoder Stage 2
    X = Conv2D(64, (3, 3),
               strides=(2, 2),
               name='conv2-1',
               padding='same',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv2-1')(X)
    X = Activation('relu')(X)
    X = Conv2D(64, (1, 1),
               strides=(1, 1),
               name='conv2-2',
               padding='same',
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv2-2')(X)

    skip_connect_2 = X
    X = Activation('relu')(X)

    # encoder Stage 3
    X, _ = convolutional_block(X,
                               f=3,
                               filters=[64, 64, 128],
                               stage=3,
                               block='a',
                               s=2)
    X, skip_connect_3 = identity_block(X, 3, [64, 64, 128], stage=3, block='b')

    # encoder Stage 4
    X, _ = convolutional_block(X,
                               f=3,
                               filters=[128, 128, 256],
                               stage=4,
                               block='a',
                               s=2)
    X, skip_connect_4 = identity_block(X,
                                       3, [128, 128, 256],
                                       stage=4,
                                       block='b')

    # latent-space representation
    volumeSize = K.int_shape(X)
    X = Flatten()(X)
    latent = Dense(latentDim)(X)

    # encoder = Model(X_input, latent, name="encoder")

    # latentInputs = Input(shape=(latentDim,))
    X = Dense(np.prod(volumeSize[1:]))(latent)
    X = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(X)

    # # decoder Stage 1
    X = Concatenate()([X, skip_connect_4])

    X, _ = identity_block_transpose(X, 3, [512, 256, 512], stage=5, block='b')
    X = Conv2DTranspose(256, (1, 1),
                        strides=(1, 1),
                        padding='same',
                        kernel_initializer=glorot_uniform(seed=0))(X)
    X, _ = convolutional_block_transpose(X,
                                         f=3,
                                         filters=[256, 128, 128],
                                         stage=5,
                                         block='a',
                                         s=2)

    # X = Cropping2D(cropping=((1, 0), (0, 0)), data_format=None)(X)

    # decoder Stage 2

    X = Concatenate()([X, skip_connect_3])

    X, _ = identity_block_transpose(X, 3, [256, 128, 256], stage=6, block='b')
    X = Conv2DTranspose(256, (1, 1),
                        strides=(1, 1),
                        padding='same',
                        kernel_initializer=glorot_uniform(seed=0))(X)
    X, _ = convolutional_block_transpose(X,
                                         f=3,
                                         filters=[128, 64, 64],
                                         stage=6,
                                         block='a',
                                         s=2)
    X = Cropping2D(cropping=((1, 0), (0, 0)), data_format=None)(X)

    # # # decoder Stage 3
    X = Concatenate()([X, skip_connect_2])
    X = Conv2DTranspose(128, (1, 1),
                        strides=(1, 1),
                        name='conv7-1',
                        padding='same',
                        kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv7-1')(X)
    X = Activation('relu')(X)
    X = Conv2DTranspose(64, (3, 3),
                        strides=(2, 2),
                        name='conv7-2',
                        padding='same',
                        kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv7-2')(X)
    X = Activation('relu')(X)
    X = Cropping2D(cropping=((1, 0), (0, 0)), data_format=None)(X)

    # # decoder Stage 4
    X = Concatenate()([X, skip_connect_1])
    X = Conv2DTranspose(64, (1, 1),
                        strides=(1, 1),
                        name='conv8-1',
                        kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv8-1')(X)
    X = Activation('relu')(X)
    X = Conv2DTranspose(32, (1, 1),
                        strides=(1, 1),
                        name='conv8-2',
                        kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv8-2')(X)
    X = Conv2DTranspose(1, (3, 3), strides=(2, 2), padding="same")(X)

    outputs = Activation("sigmoid")(X)

    autoencoder = Model(inputs=X_input,
                        outputs=outputs,
                        name='ResNet_autoencoder')
    # print(model.summary())
    return autoencoder
Ejemplo n.º 27
0
Archivo: WGAN.py Proyecto: Zachdr1/WGAN
    def generator(self):
        # Input size = 100
        inputs = Input(shape=(100, ))
        x = Dense(4 * 4 * 1024, input_shape=(100, ))(inputs)
        x = Reshape(target_shape=(4, 4, 1024))(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        # Output size = 4x4x1024

        # Input size = 4x4x1024
        x = Conv2D(filters=512, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        x = UpSampling2D()(x)
        # Output size = 8x8x512

        # Input size = 8x8x512
        x = Conv2D(filters=256, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        x = UpSampling2D()(x)
        # Output size = 16x16x256

        # Input size = 16x16x512
        x = Conv2D(filters=256, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        # Output size = 16x16x256

        # Input size = 16x16x256
        x = Conv2D(filters=128, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        x = UpSampling2D()(x)

        # Output size = 32x32x128

        # Input size = 32x32x256
        x = Conv2D(filters=128, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        x = UpSampling2D()(x)
        # Output size = 64x64x128

        # Input size = 64x64x256
        x = Conv2D(filters=128, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        x = UpSampling2D()(x)
        # Output size = 128x128x128

        # Input size = 128x128x256
        x = Conv2D(filters=128, kernel_size=5, padding='same',
                   use_bias=False)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(0.02)(x)
        # Output size = 128x128x128

        # Input size = 128x128x128
        x = Conv2D(filters=3, kernel_size=5, padding='same', use_bias=False)(x)
        out = Activation('tanh')(x)
        # Output size = 32x32x3

        net = Model(inputs=inputs, outputs=out)

        return net
Ejemplo n.º 28
0
def pointnet_cls(include_top=True,
                 weights=None,
                 input_tensor=None,
                 input_shape=(2048, 3),
                 pooling=None,
                 classes=40,
                 activation=None,
                 use_tnet=True):
    """
    PointNet model for object classification
    :param include_top: whether to include the stack of fully connected layers
    :param weights: one of `None` (random initialization),
                    'modelnet' (pre-training on ModelNet),
                    or the path to the weights file to be loaded.
    :param input_tensor: optional tensor of size BxNxK
    :param input_shape: shape of the input point clouds (NxK)
    :param pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 2D tensor output of the last convolutional block (Nx1024).
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 1D tensor of size 1024.
            - `max` means that global max pooling will
                be applied.
    :param classes: number of classes in the classification problem; if dict, construct multiple disjoint top layers
    :param activation: activation of the last layer (default None).
    :param use_tnet: whether to use the transformation subnets or not.
    :return: Keras model of the classification network
    """

    assert K.image_data_format() == 'channels_last'

    # Generate input tensor and get base network
    if input_tensor is None:
        input_tensor = Input(input_shape, name='Input_cloud')
    num_point = input_tensor.shape[-2]
    net = pointnet_base(input_tensor, use_tnet)

    # Top layers
    if include_top:
        # Symmetric function: max pooling
        # Done in 2D since 1D is painfully slow
        net = MaxPooling2D(pool_size=(num_point, 1),
                           padding='valid',
                           name='maxpool')(Lambda(K.expand_dims)(net))
        net = Reshape((1024, ))(net)
        if isinstance(classes, dict):
            # Disjoint stacks of fc layers, one per value in dict
            net = [
                dense_bn(net, units=512, scope=r + '_fc1', activation='relu')
                for r in classes
            ]
            net = [
                Dropout(0.3, name=r + '_dp1')(n) for r, n in zip(classes, net)
            ]
            net = [
                dense_bn(n, units=256, scope=r + '_fc2', activation='relu')
                for r, n in zip(classes, net)
            ]
            net = [
                Dropout(0.3, name=r + '_dp2')(n) for r, n in zip(classes, net)
            ]
            net = [
                Dense(units=classes[r], activation=activation, name=r)(n)
                for r, n in zip(classes, net)
            ]
        else:
            # Fully connected layers for a single classification task
            net = dense_bn(net, units=512, scope='fc1', activation='relu')
            net = Dropout(0.3, name='dp1')(net)
            net = dense_bn(net, units=256, scope='fc2', activation='relu')
            net = Dropout(0.3, name='dp2')(net)
            net = Dense(units=classes, name='fc3', activation=activation)(net)
    else:
        if pooling == 'avg':
            net = MaxPooling2D(pool_size=(num_point, 1),
                               padding='valid',
                               name='maxpool')(Lambda(K.expand_dims)(net))
            net = Reshape((net.shape[-2], ))(net)
        elif pooling == 'max':
            net = AveragePooling2D(pool_size=(num_point, 1),
                                   padding='valid',
                                   name='avgpool')(Lambda(K.expand_dims)(net))
            net = Reshape((net.shape[-2], ))(net)

    model = Model(input_tensor, net, name='pointnet_cls')

    # Load weights.
    if weights == 'modelnet':
        weights_path = keras_utils.get_file(
            'pointnet_modelnet_weights_tf_dim_ordering_tf_kernels.h5',
            WEIGHTS_PATH,
            cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
        if K.backend() == 'theano':
            keras_utils.convert_all_kernels_in_model(model)
    elif weights is not None:
        model.load_weights(weights, by_name=True)

    return model
Ejemplo n.º 29
0
sSAE_encoder.layers[5].set_weights(
    autoencoder_3.layers[2].get_weights())  # third Dense
sSAE_encoder.layers[6].set_weights(
    autoencoder_3.layers[3].get_weights())  # third BN

encoded_train = sSAE_encoder.predict(train)
encoded_test = sSAE_encoder.predict(test)

np.save('data/encoded_train.npy', encoded_train)
np.save('data/train_label.npy', train_label)
np.save('data/encoded_test.npy', encoded_test)
np.save('data/test_label.npy', test_label)

# 级联两层Dense 最后加一个softmax
mlp0 = Dense(units=32, activation='relu')(sSAE_encoder.output)
lstm_reshape = Reshape((1, 32))(mlp0)

lstm = LSTM(units=16, activation='tanh', return_sequences=False)(lstm_reshape)
lstm_drop = Dropout(0.3)(lstm)

mlp = Dense(units=10, activation='relu')(lstm_drop)
mlp2 = Dense(units=1, activation='sigmoid')(mlp)

classifier = Model(sSAE_encoder.input, mlp2)
optimize = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
classifier.compile(optimizer=optimize,
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

save_dir = os.path.join(os.getcwd(), 'saved_models_temp')
filepath = "best_model.hdf5"
Ejemplo n.º 30
0
    def init_model(self):
        """
        Build the UNet model with the specified input image shape.
        """
        inputs = Input(shape=self.img_shape)

        # Apply regularization if not None or 0
        kr = regularizers.l2(self.l2_reg) if self.l2_reg else None

        """
        Encoding path
        """
        filters = 64
        in_ = inputs
        residual_connections = []
        for i in range(self.depth):
            conv = Conv3D(int(filters*self.cf), self.kernel_size,
                          activation=self.activation, padding=self.padding,
                          kernel_regularizer=kr)(in_)
            conv = Conv3D(int(filters * self.cf), self.kernel_size,
                          activation=self.activation, padding=self.padding,
                          kernel_regularizer=kr)(conv)
            bn = BatchNormalization()(conv)
            in_ = MaxPooling3D(pool_size=(2, 2, 2))(bn)

            # Update filter count and add bn layer to list for residual conn.
            filters *= 2
            residual_connections.append(bn)

        """
        Bottom (no max-pool)
        """
        conv = Conv3D(int(filters * self.cf), self.kernel_size,
                      activation=self.activation, padding=self.padding,
                      kernel_regularizer=kr)(in_)
        conv = Conv3D(int(filters * self.cf), self.kernel_size,
                      activation=self.activation, padding=self.padding,
                      kernel_regularizer=kr)(conv)
        bn = BatchNormalization()(conv)

        """
        Up-sampling
        """
        residual_connections = residual_connections[::-1]
        for i in range(self.depth):
            # Reduce filter count
            filters /= 2

            # Up-sampling block
            # Note: 2x2 filters used for backward comp, but you probably
            # want to use 3x3 here instead.
            up = UpSampling3D(size=(2, 2, 2))(bn)
            conv = Conv3D(int(filters * self.cf), 2, activation=self.activation,
                          padding=self.padding, kernel_regularizer=kr)(up)
            bn = BatchNormalization()(conv)

            # Crop and concatenate
            cropped_res = self.crop_nodes_to_match(residual_connections[i], bn)
            merge = Concatenate(axis=-1)([cropped_res, bn])

            conv = Conv3D(int(filters * self.cf), self.kernel_size,
                          activation=self.activation, padding=self.padding,
                          kernel_regularizer=kr)(merge)
            conv = Conv3D(int(filters * self.cf), self.kernel_size,
                          activation=self.activation, padding=self.padding,
                          kernel_regularizer=kr)(conv)
            bn = BatchNormalization()(conv)

        """
        Output modeling layer
        """
        out = Conv3D(self.n_classes, 1, activation=self.out_activation)(bn)
        if self.flatten_output:
            out = Reshape([np.prod(self.img_shape[:3]),
                           self.n_classes], name='flatten_output')(out)

        return [inputs], [out]