Esempio n. 1
0
    def forward(self, input_tensor, input_meta, is_training):
        self.input_tensor = input_tensor
        self.is_training = is_training
        self.input_meta = input_meta
        conv1 = self.conv_block1.forward(self.input_tensor, self.is_training)
        pool1 = tf.layers.average_pooling2d(conv1,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')
        conv2 = self.conv_block2.forward(pool1, self.is_training)
        pool2 = tf.layers.average_pooling2d(conv2,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')
        conv3 = self.res_block3.forward(pool2, self.is_training)
        pool3 = tf.layers.average_pooling2d(conv3,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')
        conv4 = self.res_block4.forward(pool3, self.is_training)
        pool4 = tf.layers.average_pooling2d(conv4,
                                            pool_size=1,
                                            strides=1,
                                            padding='VALID')
        #        conv5 = self.conv_block4.forward(pool4,self.is_training)
        #        pool5 = tf.layers.average_pooling2d(conv5,pool_size=2,strides=2,padding='VALID')

        pool4 = tf.reduce_mean(pool4, axis=2)
        ###########
        fea_frames = tf.shape(pool4)[1]
        self.input_meta = tf.expand_dims(self.input_meta, 1)
        self.input_meta = tf.tile(self.input_meta, [1, fea_frames, 1])
        pool4 = tf.concat([pool4, self.input_meta], axis=-1)
        repr_size = tf.shape(pool4)[2]

        pool4 = TimeDistributed(Dense(
            self.hidden_layer_size,
            activation='relu',
            kernel_regularizer=regularizers.l2(0.0001)),
                                input_shape=(fea_frames, repr_size))(pool4)
        repr_size = self.hidden_layer_size

        # Output layer
        pool4 = TimeDistributed(Dense(
            self.classes_num, kernel_regularizer=regularizers.l2(0.0001)),
                                name='output_t',
                                input_shape=(fea_frames, repr_size))(pool4)

        # Apply autopool over time dimension
        # y = AutoPool1D(kernel_constraint=keras.constraints.non_neg(),
        #                axis=1, name='output')(y)
        output = AutoPool1D(axis=1,
                            name='output')(pool4)  ###(batch,num_classes)

        return output
def construct_mlp(num_frames,
                  input_size,
                  num_classes,
                  hidden_layer_size=128,
                  num_hidden_layers=1,
                  l2_reg=1e-5):
    """
    Construct a MLP model for urban sound tagging.

    Parameters
    ----------
    num_frames
    input_size
    num_classes
    hidden_layer_size
    num_hidden_layers
    l2_reg
    Returns
    -------
    model
    """
    # Input layer
    inp = Input(shape=(num_frames, input_size), dtype='float32', name='input')
    y = inp

    # Add hidden layers
    repr_size = input_size
    for idx in range(num_hidden_layers):
        y = TimeDistributed(Dense(hidden_layer_size,
                                  activation='relu',
                                  kernel_regularizer=regularizers.l2(l2_reg)),
                            name='dense_{}'.format(idx + 1),
                            input_shape=(num_frames, repr_size))(y)
        repr_size = hidden_layer_size

    # Output layer
    y = TimeDistributed(Dense(num_classes,
                              activation='sigmoid',
                              kernel_regularizer=regularizers.l2(l2_reg)),
                        name='output_t',
                        input_shape=(num_frames, repr_size))(y)

    # Apply autopool over time dimension
    # y = AutoPool1D(kernel_constraint=keras.constraints.non_neg(),
    #                axis=1, name='output')(y)
    y = AutoPool1D(axis=1, name='output')(y)

    m = Model(inputs=inp, outputs=y)
    m.name = 'urban_sound_classifier'
    print(m.summary())

    return m
Esempio n. 3
0
def construct_mlp(input_size, num_classes, num_frames,
                  dropout_size=0.5, ef_mode=4, l2_reg=1e-5):
    """
    Construct a MLP model for urban sound tagging.
    Parameters
    ----------
    num_frames
    input_size
    num_classes
    dropout_size
    ef_mode
    l2_reg
    Returns
    -------
    model
    """

    # Add hidden layers
    from keras.layers import Flatten, Conv1D, Conv2D, GlobalMaxPooling1D, GlobalAveragePooling1D, LSTM, Concatenate, GlobalAveragePooling2D, LeakyReLU

    import efficientnet.keras as efn

    if ef_mode == 0:
        base_model = efn.EfficientNetB0(weights='noisy-student', include_top=False, pooling='avg')
    elif ef_mode == 1:
        base_model = efn.EfficientNetB1(weights='noisy-student', include_top=False, pooling='avg')
    elif ef_mode == 2:
        base_model = efn.EfficientNetB2(weights='noisy-student', include_top=False, pooling='avg')
    elif ef_mode == 3:
        base_model = efn.EfficientNetB3(weights='noisy-student', include_top=False, pooling='avg')
    elif ef_mode == 4:
        base_model = efn.EfficientNetB4(weights='noisy-student', include_top=False, pooling='avg')  #imagenet or weights='noisy-student'
    elif ef_mode == 5:
        base_model = efn.EfficientNetB5(weights='noisy-student', include_top=False, pooling='avg')
    elif ef_mode == 6:
        base_model = efn.EfficientNetB6(weights='noisy-student', include_top=False, pooling='avg')
    elif ef_mode == 7:
        base_model = efn.EfficientNetB7(weights='noisy-student', include_top=False, pooling='avg')

    input1 = Input(shape=input_size, dtype='float32', name='input')
    input2 = Input(shape=(num_frames,85), dtype='float32', name='input2') #1621
    y = TimeDistributed(base_model)(input1)
    y = TimeDistributed(Dropout(dropout_size))(y)
    y = Concatenate()([y, input2])
    y = TimeDistributed(Dense(num_classes, activation='sigmoid', kernel_regularizer=regularizers.l2(l2_reg)))(y)
    y = AutoPool1D(axis=1, name='output')(y)

    m = Model(inputs=[input1, input2], outputs=y)
    m.summary()
    m.name = 'urban_sound_classifier'

    return m
Esempio n. 4
0
    def build(self):
        """ Missing docstring here
        """
        # input
        if self.use_time_distributed:
            input_shape = (self.n_frames, self.n_freqs)
        else:
            input_shape = (self.n_freqs,)

        inputs = Input(shape=input_shape, dtype='float32', name='input')
        y = inputs
        # Hidden layers
        for idx in range(len(self.hidden_layers_size)):
            dense_layer = Dense(self.hidden_layers_size[idx],
                                activation=self.hidden_activation,
                                kernel_regularizer=l2(self.l2_reg),
                                name='dense_{}'.format(idx+1), **self.kwargs)
            if self.use_time_distributed:
                y = TimeDistributed(dense_layer)(y)
            else:
                y = dense_layer(y)

            # Dropout
            if self.dropout_rates[idx] > 0:
                y = Dropout(self.dropout_rates[idx])(y)
        # Output layer
        dense_layer = Dense(self.n_classes, activation=self.final_activation,
                            kernel_regularizer=l2(self.l2_reg),
                            name='output', **self.kwargs)

        if self.use_time_distributed:
            y = TimeDistributed(dense_layer)(y)
        else:
            y = dense_layer(y)

        # Temporal integration
        if self.use_time_distributed:
            if self.temporal_integration == 'mean':
                y = Lambda(lambda x: K.mean(x, 1), name='temporal_integration')(y)
            elif self.temporal_integration == 'sum':
                y = Lambda(lambda x: K.sum(x, 1), name='temporal_integration')(y)
            elif self.temporal_integration == 'autopool':
                try:
                    from autopool import AutoPool1D
                except:
                    raise ImportError("Autopool is not installed")
                y = AutoPool1D(axis=1, name='output')(y)

        # Create model
        self.model = Model(inputs=inputs, outputs=y, name='model')

        super().build()
Esempio n. 5
0
def vggish_time_dist_1():

    from autopool import AutoPool1D

    inputs = K.Input(shape=(None, 128)) #(time, embedding)

    dense = K.layers.Dense(128)
    x = K.layers.TimeDistributed(dense)(inputs)

    #x = tf.keras.layers.GlobalAveragePooling1D()(x)

    x = AutoPool1D(axis=1)(x)

    x = K.layers.Dense(397, activation='sigmoid', name='output')(x)

    model = K.Model(inputs, x)

    return model
Esempio n. 6
0
    def forward(self, input_tensor, input_meta, is_training):
        self.input_tensor = input_tensor
        self.is_training = is_training
        self.input_meta = input_meta
        conv1 = self.conv_block1.forward(self.input_tensor, self.is_training)
        pool1 = tf.layers.average_pooling2d(conv1,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')
        conv2 = self.conv_block2.forward(pool1, self.is_training)
        pool2 = tf.layers.average_pooling2d(conv2,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')
        conv3 = self.conv_block3.forward(pool2, self.is_training)
        pool3 = tf.layers.average_pooling2d(conv3,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')
        conv4 = self.conv_block4.forward(pool3, self.is_training)
        pool4 = tf.layers.average_pooling2d(conv4,
                                            pool_size=2,
                                            strides=2,
                                            padding='VALID')

        #####################

        fea_frames = pool4.get_shape().as_list()[1]
        fea_bins = pool4.get_shape().as_list()[2]
        reshaped = tf.reshape(pool4,
                              [-1, fea_frames, fea_bins * self.layer_depth[3]])
        num_units = [128]
        basic_cells = [tf.nn.rnn_cell.GRUCell(num_units=n) for n in num_units]
        cells = tf.nn.rnn_cell.MultiRNNCell(basic_cells, state_is_tuple=True)
        (outputs, state) = tf.nn.dynamic_rnn(cells,
                                             reshaped,
                                             sequence_length=None,
                                             dtype=tf.float32,
                                             time_major=False)
        pool4 = tf.reshape(outputs, [-1, fea_frames, fea_bins, 32])

        ###########
        pool4 = tf.reduce_mean(pool4, axis=2)
        ###########

        fea_frames = tf.shape(pool4)[1]
        self.input_meta = tf.expand_dims(self.input_meta, 1)
        self.input_meta = tf.tile(self.input_meta, [1, fea_frames, 1])
        pool4 = tf.concat([pool4, self.input_meta], axis=-1)
        repr_size = tf.shape(pool4)[2]

        pool4 = TimeDistributed(Dense(
            self.hidden_layer_size,
            activation='relu',
            kernel_regularizer=regularizers.l2(0.0001)),
                                input_shape=(fea_frames, repr_size))(pool4)
        repr_size = self.hidden_layer_size

        # Output layer
        pool4 = TimeDistributed(Dense(
            self.classes_num, kernel_regularizer=regularizers.l2(0.0001)),
                                name='output_t',
                                input_shape=(fea_frames, repr_size))(pool4)

        # Apply autopool over time dimension
        # y = AutoPool1D(kernel_constraint=keras.constraints.non_neg(),
        #                axis=1, name='output')(y)
        output = AutoPool1D(axis=1,
                            name='output')(pool4)  ###(batch,num_classes)
        ###########

        #        reshaped = tf.reduce_mean(reshaped,axis=2)
        #        reshaped = tf.reduce_max(reshaped,axis=1)
        #        flatten = tf.layers.flatten(reshaped)
        #        output = tf.layers.dense(flatten,units=self.classes_num)

        return output