Example #1
0
    def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 2, 2),
                                   name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5),
                            strides=(1, 2, 2),
                            kernel_initializer='he_normal',
                            name='conv1')(self.zero1)
        self.batc1 = BatchNormalization(name='batc1')(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.batc1)
        self.drop1 = SpatialDropout3D(0.5)(self.actv1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max1')(self.drop1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(64, (3, 5, 5),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv2')(self.zero2)
        self.batc2 = BatchNormalization(name='batc2')(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.batc2)
        self.drop2 = SpatialDropout3D(0.5)(self.actv2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max2')(self.drop2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(96, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv3')(self.zero3)
        self.batc3 = BatchNormalization(name='batc3')(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.batc3)
        self.drop3 = SpatialDropout3D(0.5)(self.actv3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max3')(self.drop3)

        self.resh1 = TimeDistributed(Flatten())(self.maxp3)

        self.gru_1 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru1'),
                                   merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256,
                                       return_sequences=True,
                                       kernel_initializer='Orthogonal',
                                       name='gru2'),
                                   merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size,
                            kernel_initializer='he_normal',
                            name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)

        self.labels = Input(name='the_labels',
                            shape=[self.absolute_max_string_len],
                            dtype='float32')
        self.input_length = Input(name='input_length',
                                  shape=[1],
                                  dtype='int64')
        self.label_length = Input(name='label_length',
                                  shape=[1],
                                  dtype='int64')

        self.loss_out = CTC(
            'ctc',
            [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[
            self.input_data, self.labels, self.input_length, self.label_length
        ],
                           outputs=self.loss_out)
Example #2
0
    def build(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        else:
            input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)

        self.input_data = Input(name='the_input',
                                shape=input_shape,
                                dtype='float32')

        self.zero1 = ZeroPadding3D(padding=(1, 0, 0),
                                   name='zero1')(self.input_data)
        self.conv1 = Conv3D(64, (3, 3, 3),
                            strides=(1, 2, 2),
                            kernel_initializer='he_normal',
                            name='conv1')(self.zero1)
        self.gn1 = GroupNormalization(name='gn1', groups=32)(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.gn1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max1')(self.actv1)

        self.zero2 = ZeroPadding3D(padding=(1, 0, 0), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(128, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv2')(self.zero2)
        self.gn2 = GroupNormalization(name='gn2', groups=32)(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.gn2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max2')(self.actv2)

        self.zero3 = ZeroPadding3D(padding=(1, 0, 0), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(256, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv3')(self.zero3)
        self.gn3 = GroupNormalization(name='gn3', groups=32)(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.gn3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 2, 2),
                                  name='max3')(self.actv3)

        self.zero4 = ZeroPadding3D(padding=(1, 0, 0), name='zero4')(self.maxp3)
        self.conv4 = Conv3D(512, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv4')(self.zero4)
        self.gn4 = GroupNormalization(name='gn4', groups=32)(self.conv4)
        self.actv4 = Activation('relu', name='actv4')(self.gn4)

        self.zero5 = ZeroPadding3D(padding=(1, 0, 0), name='zero5')(self.actv4)
        self.conv5 = Conv3D(512, (3, 3, 3),
                            strides=(1, 1, 1),
                            kernel_initializer='he_normal',
                            name='conv5')(self.zero5)
        self.gn5 = GroupNormalization(name='gn5', groups=32)(self.conv5)
        self.actv5 = Activation('relu', name='actv5')(self.gn5)
        self.maxp5 = MaxPooling3D(pool_size=(1, 2, 2),
                                  strides=(1, 1, 1),
                                  name='max5')(self.actv5)

        self.resh1 = TimeDistributed(Flatten())(self.maxp5)

        self.lstm_1 = Bidirectional(LSTM(768,
                                         return_sequences=True,
                                         kernel_initializer='Orthogonal',
                                         name='lstm1'),
                                    merge_mode='concat')(self.resh1)
        self.lstm_1_gn = GroupNormalization(name='lstm_1_gn',
                                            groups=32)(self.lstm_1)
        self.lstm_2 = Bidirectional(LSTM(768,
                                         return_sequences=True,
                                         kernel_initializer='Orthogonal',
                                         name='lstm2'),
                                    merge_mode='concat')(self.lstm_1_gn)
        self.lstm_2_gn = GroupNormalization(name='lstm_2_gn',
                                            groups=32)(self.lstm_2)
        self.lstm_3 = Bidirectional(LSTM(768,
                                         return_sequences=True,
                                         kernel_initializer='Orthogonal',
                                         name='lstm3'),
                                    merge_mode='concat')(self.lstm_2_gn)
        self.lstm_3_gn = GroupNormalization(name='lstm_3_gn',
                                            groups=32)(self.lstm_3)

        # transforms RNN output to character activations:
        self.dense1 = Dense(768, kernel_initializer='he_normal',
                            name='dense1')(self.lstm_3_gn)
        self.gn6 = GroupNormalization(name='gn6', groups=32)(self.dense1)
        self.actv6 = Activation('relu', name='actv6')(self.gn6)
        self.dense2 = Dense(self.output_size,
                            kernel_initializer='he_normal',
                            name='dense2')(self.actv6)

        self.y_pred = Activation('softmax', name='softmax')(self.dense2)

        self.labels = Input(name='the_labels',
                            shape=[self.absolute_max_string_len],
                            dtype='float32')
        self.input_length = Input(name='input_length',
                                  shape=[1],
                                  dtype='int64')
        self.label_length = Input(name='label_length',
                                  shape=[1],
                                  dtype='int64')

        self.loss_out = CTC(
            'ctc',
            [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[
            self.input_data, self.labels, self.input_length, self.label_length
        ],
                           outputs=self.loss_out)