Beispiel #1
0
def conv_2d_test(th):
    assert isinstance(th, Config)
    # Initiate model
    th.mark = 'cnn_2d' + th.mark

    def data_dim(sample_rate=44100, duration=2, n_mfcc=40):
        audio_length = sample_rate * duration
        dim = (n_mfcc, 1 + int(np.floor(audio_length / 512)), 1)
        return dim

    dim = data_dim()

    model = Classifier(mark=th.mark)

    # Add input layer
    model.add(Input(sample_shape=[dim[0], dim[1], 1]))
    # Add hidden layers
    model.add(Conv2D(32, (4, 10), padding='same'))
    model.add(BatchNorm())
    model.add(Activation('relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    # model.add(Dropout(0.7))

    model.add(Conv2D(32, (4, 10), padding='same'))
    model.add(BatchNorm())
    model.add(Activation('relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    # model.add(Dropout(0.7))

    model.add(Conv2D(32, (4, 10), padding='same'))
    model.add(BatchNorm())
    model.add(Activation('relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    # model.add(Dropout(0.7))

    model.add(Conv2D(32, (4, 10), padding='same'))
    model.add(BatchNorm())
    model.add(Activation('relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    # model.add(Dropout(0.7))

    model.add(Flatten())
    model.add(Linear(output_dim=64))
    model.add(BatchNorm())
    model.add(Activation('relu'))

    # Add output layer
    model.add(Linear(output_dim=41))
    model.add(Activation('softmax'))

    # Build model
    optimizer = tf.train.AdamOptimizer(learning_rate=th.learning_rate)
    model.build(optimizer=optimizer)

    return model
Beispiel #2
0
    def _get_layers(self):
        """The architecture follows a common pattern used in related literatures,
    that is, having a stem followed by stacks of cells. The image-like
    tensors will be down sampled after each stack. Finally a global average
    pooling layer is added."""
        layers = []
        # Add stem
        layers.extend(self.conv_bn_relu(self.stem_channels, 3))
        # Add stacks
        output_channels = self.stem_channels
        for i in range(self.num_stacks):
            input_channels = output_channels
            # Down sample except in first cell
            if i > 0:
                layers.append(MaxPool2D(2, 2))
                # Double channel number
                output_channels *= 2

            for _ in range(self.cells_per_stack):
                layers.append(self._get_cell(input_channels, output_channels))
                input_channels = output_channels

        # Add global average pooling layer
        layers.append(GlobalAveragePooling2D())

        return layers
Beispiel #3
0
def deep_conv(mark):
    # Initiate predictor
    model = Classifier(mark=mark)
    model.add(Input(sample_shape=config.sample_shape))

    def ConvBNReLU(filters, strength=1.0, bn=True):
        model.add(
            Conv2D(filters=filters,
                   kernel_size=5,
                   padding='same',
                   kernel_regularizer=regularizers.L2(strength=strength)))

        if bn:
            model.add(BatchNorm())

        model.add(Activation('relu'))

    # Conv layers
    reg = 1e-5
    ConvBNReLU(32, reg)
    model.add(Dropout(0.5))
    ConvBNReLU(32, reg)
    model.add(MaxPool2D(2, 2, 'same'))

    ConvBNReLU(64, reg)
    model.add(Dropout(0.5))
    ConvBNReLU(64, reg)
    model.add(MaxPool2D(2, 2, 'same'))

    ConvBNReLU(128, reg)

    # FC layers
    model.add(Flatten())
    model.add(Linear(256))
    # model.add(BatchNorm())
    model.add(Activation('relu'))

    model.add(Linear(256))
    # model.add(BatchNorm())
    model.add(Activation('relu'))

    model.add(Linear(config.y_dim))

    # Build model
    model.build(optimizer=tf.train.AdamOptimizer(learning_rate=1e-4))

    return model
Beispiel #4
0
def ka_convnet(mark):
    model = Classifier(mark=mark)
    model.add(Input(sample_shape=config.sample_shape))

    strength = 1e-5

    def ConvLayer(filters, bn=False):
        model.add(
            Conv2D(filters=filters,
                   kernel_size=5,
                   padding='same',
                   kernel_regularizer=regularizers.L2(strength=strength)))
        if bn:
            model.add(BatchNorm())
        model.add(Activation.ReLU())

    # Define structure
    ConvLayer(32)
    model.add(Dropout(0.5))
    ConvLayer(32, False)
    model.add(Dropout(0.5))
    model.add(MaxPool2D(2, 2, 'same'))
    ConvLayer(64, True)
    model.add(Dropout(0.5))
    model.add(MaxPool2D(2, 2, 'same'))

    model.add(Flatten())
    model.add(Linear(128))
    model.add(Activation.ReLU())
    # model.add(Dropout(0.5))
    model.add(Linear(10))

    # Build model
    model.build(optimizer=tf.train.AdamOptimizer(learning_rate=1e-4))

    return model
Beispiel #5
0
 def maxpool_drop(pool_size, strides, twod=True, drop=True):
     if twod:
         subsubnet.add(MaxPool2D(pool_size=pool_size, strides=strides))
     else:
         subsubnet.add(MaxPool1D(pool_size=pool_size, strides=strides))
     if drop: subsubnet.add(Dropout(th.raw_keep_prob))
Beispiel #6
0
def res_00(th):
    assert isinstance(th, Config)
    model = Classifier(mark=th.mark)

    def data_dim(sample_rate=16000, duration=2, n_mfcc=50):
        audio_length = sample_rate * duration
        dim = (n_mfcc, 1 + int(np.floor(audio_length / 512)), 1)
        return dim

    dim = data_dim()

    # Add hidden layers
    subnet = model.add(inter_type=model.CONCAT)
    # the net to process raw data
    subsubnet = subnet.add()
    # subsubnet.add(Input(sample_shape=[32000, 1], name='raw_data'))
    subsubnet.add(Input(sample_shape=[32000, 1]))
    subsubnet.add(Conv1D(filters=16, kernel_size=9, padding='valid'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(Conv1D(filters=16, kernel_size=9, padding='valid'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool1D(pool_size=16, strides=16))
    subsubnet.add(Dropout(th.raw_keep_prob))

    subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid'))
    subsubnet.add(Activation('relu'))
    subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid'))
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool1D(pool_size=4, strides=4))
    subsubnet.add(Dropout(th.raw_keep_prob))

    subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid'))
    subsubnet.add(Activation('relu'))
    subsubnet.add(Conv1D(filters=32, kernel_size=3, padding='valid'))
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool1D(pool_size=4, strides=4))

    subsubnet.add(Conv1D(filters=256, kernel_size=3, padding='valid'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(Conv1D(filters=256, kernel_size=3, padding='valid'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(GlobalMaxPooling1D())

    # the net to process mfcc features
    subsubnet = subnet.add()
    subsubnet.add(Input(sample_shape=[dim[0], dim[1], 1], name='mfcc'))
    subsubnet.add(Conv2D(32, (4, 10), padding='same'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(th.mfcc_keep_prob))

    net = subsubnet.add(ResidualNet())
    net.add(Conv2D(32, (4, 10), padding='same'))
    net.add(BatchNorm())
    net.add_shortcut()
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(th.mfcc_keep_prob))
    #
    net = subsubnet.add(ResidualNet())
    net.add(Conv2D(32, (4, 10), padding='same'))
    net.add(BatchNorm())
    net.add_shortcut()
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(th.mfcc_keep_prob))

    net = subsubnet.add(ResidualNet())
    net.add(Conv2D(32, (4, 10), padding='same'))
    net.add(BatchNorm())
    net.add_shortcut()
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(th.mfcc_keep_prob))

    net = subsubnet.add(ResidualNet())
    net.add(Conv2D(32, (4, 10), padding='same'))
    net.add(BatchNorm())
    net.add_shortcut()
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(th.mfcc_keep_prob))
    subsubnet.add(Flatten())

    subsubnet.add(Dropout(th.concat_keep_prob))
    model.add(Linear(output_dim=128))
    model.add(BatchNorm())
    model.add(Activation('relu'))
    #
    model.add(Linear(output_dim=64))
    model.add(BatchNorm())
    model.add(Activation('relu'))

    # Add output layer
    model.add(Linear(output_dim=41))
    model.add(Activation('softmax'))

    # Build model
    optimizer = tf.train.AdamOptimizer(learning_rate=th.learning_rate)
    model.build(optimizer=optimizer)

    return model
Beispiel #7
0
def multinput_mlp(th):
    assert isinstance(th, Config)
    model = Classifier(mark=th.mark)

    def data_dim(sample_rate=16000, duration=2, n_mfcc=50):
        audio_length = sample_rate * duration
        dim = (n_mfcc, 1 + int(np.floor(audio_length / 512)), 1)
        return dim

    dim = data_dim()

    # Add hidden layers
    subnet = model.add(inter_type=model.CONCAT)

    subsubnet = subnet.add()
    subsubnet.add(Input(sample_shape=[32000, 1]))
    subsubnet.add(Linear(output_dim=512))
    subsubnet.add(Activation('relu'))
    subsubnet.add(Linear(output_dim=256))
    subsubnet.add(Activation('relu'))

    subsubnet = subnet.add()
    subsubnet.add(Input(sample_shape=[dim[0], dim[1], 1], name='mfcc'))
    subsubnet.add(Conv2D(32, (4, 10), padding='same'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(0.8))

    # subsubnet.add(Conv2D(32, (4, 10), padding='same'))
    # subsubnet.add(BatchNorm())
    # subsubnet.add(Activation('relu'))
    # subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    # subsubnet.add(Dropout(0.8))

    subsubnet.add(Conv2D(32, (4, 10), padding='same'))
    subsubnet.add(BatchNorm())
    subsubnet.add(Activation('relu'))
    subsubnet.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    subsubnet.add(Dropout(0.7))

    subsubnet.add(Flatten())

    model.add(Linear(output_dim=128))
    model.add(BatchNorm())
    model.add(Activation('relu'))

    model.add(Linear(output_dim=64))
    model.add(BatchNorm())
    model.add(Activation('relu'))

    model.add(Linear(output_dim=64))
    model.add(BatchNorm())
    model.add(Activation('relu'))

    # Add output layer
    model.add(Linear(output_dim=41))
    model.add(Activation('softmax'))

    # Build model
    optimizer = tf.train.AdamOptimizer(learning_rate=th.learning_rate)
    model.build(optimizer=optimizer)

    return model