def forward(self, input_tensor, input_meta, is_training): self.input_tensor = input_tensor self.is_training = is_training self.input_meta = input_meta conv1 = self.conv_block1.forward(self.input_tensor, self.is_training) pool1 = tf.layers.average_pooling2d(conv1, pool_size=2, strides=2, padding='VALID') conv2 = self.conv_block2.forward(pool1, self.is_training) pool2 = tf.layers.average_pooling2d(conv2, pool_size=2, strides=2, padding='VALID') conv3 = self.res_block3.forward(pool2, self.is_training) pool3 = tf.layers.average_pooling2d(conv3, pool_size=2, strides=2, padding='VALID') conv4 = self.res_block4.forward(pool3, self.is_training) pool4 = tf.layers.average_pooling2d(conv4, pool_size=1, strides=1, padding='VALID') # conv5 = self.conv_block4.forward(pool4,self.is_training) # pool5 = tf.layers.average_pooling2d(conv5,pool_size=2,strides=2,padding='VALID') pool4 = tf.reduce_mean(pool4, axis=2) ########### fea_frames = tf.shape(pool4)[1] self.input_meta = tf.expand_dims(self.input_meta, 1) self.input_meta = tf.tile(self.input_meta, [1, fea_frames, 1]) pool4 = tf.concat([pool4, self.input_meta], axis=-1) repr_size = tf.shape(pool4)[2] pool4 = TimeDistributed(Dense( self.hidden_layer_size, activation='relu', kernel_regularizer=regularizers.l2(0.0001)), input_shape=(fea_frames, repr_size))(pool4) repr_size = self.hidden_layer_size # Output layer pool4 = TimeDistributed(Dense( self.classes_num, kernel_regularizer=regularizers.l2(0.0001)), name='output_t', input_shape=(fea_frames, repr_size))(pool4) # Apply autopool over time dimension # y = AutoPool1D(kernel_constraint=keras.constraints.non_neg(), # axis=1, name='output')(y) output = AutoPool1D(axis=1, name='output')(pool4) ###(batch,num_classes) return output
def construct_mlp(num_frames, input_size, num_classes, hidden_layer_size=128, num_hidden_layers=1, l2_reg=1e-5): """ Construct a MLP model for urban sound tagging. Parameters ---------- num_frames input_size num_classes hidden_layer_size num_hidden_layers l2_reg Returns ------- model """ # Input layer inp = Input(shape=(num_frames, input_size), dtype='float32', name='input') y = inp # Add hidden layers repr_size = input_size for idx in range(num_hidden_layers): y = TimeDistributed(Dense(hidden_layer_size, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)), name='dense_{}'.format(idx + 1), input_shape=(num_frames, repr_size))(y) repr_size = hidden_layer_size # Output layer y = TimeDistributed(Dense(num_classes, activation='sigmoid', kernel_regularizer=regularizers.l2(l2_reg)), name='output_t', input_shape=(num_frames, repr_size))(y) # Apply autopool over time dimension # y = AutoPool1D(kernel_constraint=keras.constraints.non_neg(), # axis=1, name='output')(y) y = AutoPool1D(axis=1, name='output')(y) m = Model(inputs=inp, outputs=y) m.name = 'urban_sound_classifier' print(m.summary()) return m
def construct_mlp(input_size, num_classes, num_frames, dropout_size=0.5, ef_mode=4, l2_reg=1e-5): """ Construct a MLP model for urban sound tagging. Parameters ---------- num_frames input_size num_classes dropout_size ef_mode l2_reg Returns ------- model """ # Add hidden layers from keras.layers import Flatten, Conv1D, Conv2D, GlobalMaxPooling1D, GlobalAveragePooling1D, LSTM, Concatenate, GlobalAveragePooling2D, LeakyReLU import efficientnet.keras as efn if ef_mode == 0: base_model = efn.EfficientNetB0(weights='noisy-student', include_top=False, pooling='avg') elif ef_mode == 1: base_model = efn.EfficientNetB1(weights='noisy-student', include_top=False, pooling='avg') elif ef_mode == 2: base_model = efn.EfficientNetB2(weights='noisy-student', include_top=False, pooling='avg') elif ef_mode == 3: base_model = efn.EfficientNetB3(weights='noisy-student', include_top=False, pooling='avg') elif ef_mode == 4: base_model = efn.EfficientNetB4(weights='noisy-student', include_top=False, pooling='avg') #imagenet or weights='noisy-student' elif ef_mode == 5: base_model = efn.EfficientNetB5(weights='noisy-student', include_top=False, pooling='avg') elif ef_mode == 6: base_model = efn.EfficientNetB6(weights='noisy-student', include_top=False, pooling='avg') elif ef_mode == 7: base_model = efn.EfficientNetB7(weights='noisy-student', include_top=False, pooling='avg') input1 = Input(shape=input_size, dtype='float32', name='input') input2 = Input(shape=(num_frames,85), dtype='float32', name='input2') #1621 y = TimeDistributed(base_model)(input1) y = TimeDistributed(Dropout(dropout_size))(y) y = Concatenate()([y, input2]) y = TimeDistributed(Dense(num_classes, activation='sigmoid', kernel_regularizer=regularizers.l2(l2_reg)))(y) y = AutoPool1D(axis=1, name='output')(y) m = Model(inputs=[input1, input2], outputs=y) m.summary() m.name = 'urban_sound_classifier' return m
def build(self): """ Missing docstring here """ # input if self.use_time_distributed: input_shape = (self.n_frames, self.n_freqs) else: input_shape = (self.n_freqs,) inputs = Input(shape=input_shape, dtype='float32', name='input') y = inputs # Hidden layers for idx in range(len(self.hidden_layers_size)): dense_layer = Dense(self.hidden_layers_size[idx], activation=self.hidden_activation, kernel_regularizer=l2(self.l2_reg), name='dense_{}'.format(idx+1), **self.kwargs) if self.use_time_distributed: y = TimeDistributed(dense_layer)(y) else: y = dense_layer(y) # Dropout if self.dropout_rates[idx] > 0: y = Dropout(self.dropout_rates[idx])(y) # Output layer dense_layer = Dense(self.n_classes, activation=self.final_activation, kernel_regularizer=l2(self.l2_reg), name='output', **self.kwargs) if self.use_time_distributed: y = TimeDistributed(dense_layer)(y) else: y = dense_layer(y) # Temporal integration if self.use_time_distributed: if self.temporal_integration == 'mean': y = Lambda(lambda x: K.mean(x, 1), name='temporal_integration')(y) elif self.temporal_integration == 'sum': y = Lambda(lambda x: K.sum(x, 1), name='temporal_integration')(y) elif self.temporal_integration == 'autopool': try: from autopool import AutoPool1D except: raise ImportError("Autopool is not installed") y = AutoPool1D(axis=1, name='output')(y) # Create model self.model = Model(inputs=inputs, outputs=y, name='model') super().build()
def vggish_time_dist_1(): from autopool import AutoPool1D inputs = K.Input(shape=(None, 128)) #(time, embedding) dense = K.layers.Dense(128) x = K.layers.TimeDistributed(dense)(inputs) #x = tf.keras.layers.GlobalAveragePooling1D()(x) x = AutoPool1D(axis=1)(x) x = K.layers.Dense(397, activation='sigmoid', name='output')(x) model = K.Model(inputs, x) return model
def forward(self, input_tensor, input_meta, is_training): self.input_tensor = input_tensor self.is_training = is_training self.input_meta = input_meta conv1 = self.conv_block1.forward(self.input_tensor, self.is_training) pool1 = tf.layers.average_pooling2d(conv1, pool_size=2, strides=2, padding='VALID') conv2 = self.conv_block2.forward(pool1, self.is_training) pool2 = tf.layers.average_pooling2d(conv2, pool_size=2, strides=2, padding='VALID') conv3 = self.conv_block3.forward(pool2, self.is_training) pool3 = tf.layers.average_pooling2d(conv3, pool_size=2, strides=2, padding='VALID') conv4 = self.conv_block4.forward(pool3, self.is_training) pool4 = tf.layers.average_pooling2d(conv4, pool_size=2, strides=2, padding='VALID') ##################### fea_frames = pool4.get_shape().as_list()[1] fea_bins = pool4.get_shape().as_list()[2] reshaped = tf.reshape(pool4, [-1, fea_frames, fea_bins * self.layer_depth[3]]) num_units = [128] basic_cells = [tf.nn.rnn_cell.GRUCell(num_units=n) for n in num_units] cells = tf.nn.rnn_cell.MultiRNNCell(basic_cells, state_is_tuple=True) (outputs, state) = tf.nn.dynamic_rnn(cells, reshaped, sequence_length=None, dtype=tf.float32, time_major=False) pool4 = tf.reshape(outputs, [-1, fea_frames, fea_bins, 32]) ########### pool4 = tf.reduce_mean(pool4, axis=2) ########### fea_frames = tf.shape(pool4)[1] self.input_meta = tf.expand_dims(self.input_meta, 1) self.input_meta = tf.tile(self.input_meta, [1, fea_frames, 1]) pool4 = tf.concat([pool4, self.input_meta], axis=-1) repr_size = tf.shape(pool4)[2] pool4 = TimeDistributed(Dense( self.hidden_layer_size, activation='relu', kernel_regularizer=regularizers.l2(0.0001)), input_shape=(fea_frames, repr_size))(pool4) repr_size = self.hidden_layer_size # Output layer pool4 = TimeDistributed(Dense( self.classes_num, kernel_regularizer=regularizers.l2(0.0001)), name='output_t', input_shape=(fea_frames, repr_size))(pool4) # Apply autopool over time dimension # y = AutoPool1D(kernel_constraint=keras.constraints.non_neg(), # axis=1, name='output')(y) output = AutoPool1D(axis=1, name='output')(pool4) ###(batch,num_classes) ########### # reshaped = tf.reduce_mean(reshaped,axis=2) # reshaped = tf.reduce_max(reshaped,axis=1) # flatten = tf.layers.flatten(reshaped) # output = tf.layers.dense(flatten,units=self.classes_num) return output