def deep_conv(mark): # Initiate predictor model = Classifier(mark=mark) model.add(Input(sample_shape=config.sample_shape)) def ConvBNReLU(filters, strength=1.0, bn=True): model.add( Conv2D(filters=filters, kernel_size=5, padding='same', kernel_regularizer=regularizers.L2(strength=strength))) if bn: model.add(BatchNorm()) model.add(Activation('relu')) # Conv layers reg = 1e-5 ConvBNReLU(32, reg) model.add(Dropout(0.5)) ConvBNReLU(32, reg) model.add(MaxPool2D(2, 2, 'same')) ConvBNReLU(64, reg) model.add(Dropout(0.5)) ConvBNReLU(64, reg) model.add(MaxPool2D(2, 2, 'same')) ConvBNReLU(128, reg) # FC layers model.add(Flatten()) model.add(Linear(256)) # model.add(BatchNorm()) model.add(Activation('relu')) model.add(Linear(256)) # model.add(BatchNorm()) model.add(Activation('relu')) model.add(Linear(config.y_dim)) # Build model model.build(optimizer=tf.train.AdamOptimizer(learning_rate=1e-4)) return model
def ka_convnet(mark): model = Classifier(mark=mark) model.add(Input(sample_shape=config.sample_shape)) strength = 1e-5 def ConvLayer(filters, bn=False): model.add( Conv2D(filters=filters, kernel_size=5, padding='same', kernel_regularizer=regularizers.L2(strength=strength))) if bn: model.add(BatchNorm()) model.add(Activation.ReLU()) # Define structure ConvLayer(32) model.add(Dropout(0.5)) ConvLayer(32, False) model.add(Dropout(0.5)) model.add(MaxPool2D(2, 2, 'same')) ConvLayer(64, True) model.add(Dropout(0.5)) model.add(MaxPool2D(2, 2, 'same')) model.add(Flatten()) model.add(Linear(128)) model.add(Activation.ReLU()) # model.add(Dropout(0.5)) model.add(Linear(10)) # Build model model.build(optimizer=tf.train.AdamOptimizer(learning_rate=1e-4)) return model
def maxpool_drop(pool_size, strides, twod=True, drop=True): if twod: subsubnet.add(MaxPool2D(pool_size=pool_size, strides=strides)) else: subsubnet.add(MaxPool1D(pool_size=pool_size, strides=strides)) if drop: subsubnet.add(Dropout(th.raw_keep_prob))
def multinput_ver_only(th): assert isinstance(th, Config) # model = Classifier(mark=th.mark) model = Classifier_Gpat(mark=th.mark) def data_dim(sample_rate=16000, duration=2, n_mfcc=50): audio_length = sample_rate * duration dim = (n_mfcc, 1 + int(np.floor(audio_length / 512)), 1) return dim dim = data_dim() # Add hidden layers subnet = model.add(inter_type=model.CONCAT) subsubnet = subnet.add() # the net to process raw data subsubnet.add(Input(sample_shape=[32000, 1])) def conv_bn_relu(filters, twod=True, bn=True): if twod: subsubnet.add( Conv2D(filters=filters, kernel_size=(4, 10), padding='same')) else: subsubnet.add( Conv1D(filters=filters, kernel_size=9, padding='valid')) if bn: subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) def maxpool_drop(pool_size, strides, twod=True, drop=True): if twod: subsubnet.add(MaxPool2D(pool_size=pool_size, strides=strides)) else: subsubnet.add(MaxPool1D(pool_size=pool_size, strides=strides)) if drop: subsubnet.add(Dropout(th.raw_keep_prob)) for _ in range(th.raw_std_blocks): conv_bn_relu(32, twod=False, bn=True) maxpool_drop(pool_size=16, strides=16, drop=True, twod=False) conv_bn_relu(filters=32, twod=False, bn=True) subsubnet.add(Dropout(th.raw_keep_prob)) subsubnet.add(GlobalMaxPooling1D()) # the net to process mfcc features subsubnet = subnet.add() subsubnet.add(Input(sample_shape=[dim[0], dim[1], 1], name='mfcc')) for _ in range(th.mfcc_std_blocks): conv_bn_relu(filters=th.mfcc_cnn_filters, bn=True) maxpool_drop(pool_size=(2, 2), strides=(2, 2)) subsubnet.add(Flatten()) model.add(Dropout(th.concat_keep_prob)) def linear_bn_relu(units, bn=True): model.add(Linear(output_dim=units)) if bn: model.add(BatchNorm()) model.add(Activation('relu')) for _ in range(th.concat_std_blocks): linear_bn_relu(th.concat_part_units) # Add output layer model.add(Linear(output_dim=41)) model.add(Activation('softmax')) # Build model optimizer = tf.train.AdamOptimizer(learning_rate=th.learning_rate) model.build(optimizer=optimizer) return model
def res_00(th): assert isinstance(th, Config) model = Classifier(mark=th.mark) def data_dim(sample_rate=16000, duration=2, n_mfcc=50): audio_length = sample_rate * duration dim = (n_mfcc, 1 + int(np.floor(audio_length / 512)), 1) return dim dim = data_dim() # Add hidden layers subnet = model.add(inter_type=model.CONCAT) # the net to process raw data subsubnet = subnet.add() # subsubnet.add(Input(sample_shape=[32000, 1], name='raw_data')) subsubnet.add(Input(sample_shape=[32000, 1])) subsubnet.add(Conv1D(filters=16, kernel_size=9, padding='valid')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(Conv1D(filters=16, kernel_size=9, padding='valid')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(MaxPool1D(pool_size=16, strides=16)) subsubnet.add(Dropout(th.raw_keep_prob)) subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid')) subsubnet.add(Activation('relu')) subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid')) subsubnet.add(Activation('relu')) subsubnet.add(MaxPool1D(pool_size=4, strides=4)) subsubnet.add(Dropout(th.raw_keep_prob)) subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid')) subsubnet.add(Activation('relu')) subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid')) subsubnet.add(Activation('relu')) subsubnet.add(MaxPool1D(pool_size=4, strides=4)) subsubnet.add(Conv1D(filters=256, kernel_size=3, padding='valid')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(Conv1D(filters=256, kernel_size=3, padding='valid')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(GlobalMaxPooling1D()) # the net to process mfcc features subsubnet = subnet.add() subsubnet.add(Input(sample_shape=[dim[0], dim[1], 1], name='mfcc')) subsubnet.add(Conv2D(32, (4, 10), padding='same')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(th.mfcc_keep_prob)) net = subsubnet.add(ResidualNet()) net.add(Conv2D(32, (4, 10), padding='same')) net.add(BatchNorm()) net.add_shortcut() subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(th.mfcc_keep_prob)) # net = subsubnet.add(ResidualNet()) net.add(Conv2D(32, (4, 10), padding='same')) net.add(BatchNorm()) net.add_shortcut() subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(th.mfcc_keep_prob)) net = subsubnet.add(ResidualNet()) net.add(Conv2D(32, (4, 10), padding='same')) net.add(BatchNorm()) net.add_shortcut() subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(th.mfcc_keep_prob)) net = subsubnet.add(ResidualNet()) net.add(Conv2D(32, (4, 10), padding='same')) net.add(BatchNorm()) net.add_shortcut() subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(th.mfcc_keep_prob)) subsubnet.add(Flatten()) subsubnet.add(Dropout(th.concat_keep_prob)) model.add(Linear(output_dim=128)) model.add(BatchNorm()) model.add(Activation('relu')) # model.add(Linear(output_dim=64)) model.add(BatchNorm()) model.add(Activation('relu')) # Add output layer model.add(Linear(output_dim=41)) model.add(Activation('softmax')) # Build model optimizer = tf.train.AdamOptimizer(learning_rate=th.learning_rate) model.build(optimizer=optimizer) return model
def multinput_mlp(th): assert isinstance(th, Config) model = Classifier(mark=th.mark) def data_dim(sample_rate=16000, duration=2, n_mfcc=50): audio_length = sample_rate * duration dim = (n_mfcc, 1 + int(np.floor(audio_length / 512)), 1) return dim dim = data_dim() # Add hidden layers subnet = model.add(inter_type=model.CONCAT) subsubnet = subnet.add() subsubnet.add(Input(sample_shape=[32000, 1])) subsubnet.add(Linear(output_dim=512)) subsubnet.add(Activation('relu')) subsubnet.add(Linear(output_dim=256)) subsubnet.add(Activation('relu')) subsubnet = subnet.add() subsubnet.add(Input(sample_shape=[dim[0], dim[1], 1], name='mfcc')) subsubnet.add(Conv2D(32, (4, 10), padding='same')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(0.8)) # subsubnet.add(Conv2D(32, (4, 10), padding='same')) # subsubnet.add(BatchNorm()) # subsubnet.add(Activation('relu')) # subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) # subsubnet.add(Dropout(0.8)) subsubnet.add(Conv2D(32, (4, 10), padding='same')) subsubnet.add(BatchNorm()) subsubnet.add(Activation('relu')) subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) subsubnet.add(Dropout(0.7)) subsubnet.add(Flatten()) model.add(Linear(output_dim=128)) model.add(BatchNorm()) model.add(Activation('relu')) model.add(Linear(output_dim=64)) model.add(BatchNorm()) model.add(Activation('relu')) model.add(Linear(output_dim=64)) model.add(BatchNorm()) model.add(Activation('relu')) # Add output layer model.add(Linear(output_dim=41)) model.add(Activation('softmax')) # Build model optimizer = tf.train.AdamOptimizer(learning_rate=th.learning_rate) model.build(optimizer=optimizer) return model