コード例 #1
0
def train_basic_audio_classifier(num_classes, model_savepath, training_data_folder,training_epoch, batch_size = 128, amount_limit=5000, forceLoad = True):
"""
train a basic audio classification model.
"""

    epochs = training_epoch
    kernel_size = 5
    framerate = 16384

    (x_train, y_train), (x_test, y_test) = load_audio(training_data_folder, num_classes, forceLoad=forceLoad, framerate=framerate,amount_limit=amount_limit)

    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)



    #save_sound(x_train, "classification","xtrain",upscale=False)
    #save_sound(x_test, "classification","xtest",upscale=False)

    input_shape = (x_train.shape[1],1)
    convolution_layers = count_convolutions(input_shape, kernel_size)

    model = keras.models.Sequential()
    model.add(Conv1D(16, kernel_size=kernel_size, activation="selu", strides=2, input_shape=input_shape, padding="same"))
    for i in range(convolution_layers):
        model.add(Conv1D(32, kernel_size=kernel_size, activation="selu", strides=2,padding="same"))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(128, activation="selu"))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(0.0005),
              metrics=['accuracy'])
    model.summary()
    #model.fit(np.expand_dims(x_train, axis=2), y_train,
    #          batch_size=batch_size,
    #          epochs=epochs,
    #          verbose=1,
    #          validation_data=(np.expand_dims(x_test, axis=2), y_test))
    model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

    score = model.evaluate(x_test, y_test, verbose=0)

    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    model.save(model_savepath)
    print('model saved to ', model_savepath)
コード例 #2
0
    def build_critic(self):

        model = Sequential()

        convolution_layers = count_convolutions(self.audio_shape,
                                                self.kernel_size)

        input_shape = (self.audio_shape[1], 1)

        model = keras.models.Sequential()

        model.add(
            Conv1D(16,
                   kernel_size=self.kernel_size,
                   activation='selu',
                   strides=2,
                   input_shape=self.audio_shape,
                   padding="same"))
        for i in range(convolution_layers):
            model.add(
                Conv1D(32,
                       kernel_size=self.kernel_size,
                       activation='selu',
                       strides=2,
                       padding="same"))
        model.add(Flatten())
        model.add(Dropout(0.5))
        model.add(Dense(32, activation='selu'))
        model.add(Dropout(0.5))
        model.add(Dense(1))

        model.summary()

        clip = Input(shape=self.audio_shape)
        validity = model(clip)

        return Model(clip, validity)
コード例 #3
0
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

num_classes = 10

(x_train, y_train), (x_test, y_test) = load_audio("speech_commands",
                                                  num_classes)

batch_size = 30
epochs = 50
kernel_size = 5

input_shape = (x_train.shape[1], 1)
convolution_layers = count_convolutions(input_shape, kernel_size)

model = keras.models.Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256))
model.add(LeakyReLU(alpha=0.2))

model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])
model.summary()
model.fit(x_train,
          y_train,
コード例 #4
0
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

nClasses=10

(x_train, y_train), (x_test, y_test) = load_audio("speech_commands", nClasses)

kernel_size = 5
sess=tf.InteractiveSession()

convolution_layers = count_convolutions(x_train.shape, kernel_size)
print("{} convolution layers".format(convolution_layers))

#declare input placeholders to which to upload data
tfX=tf.placeholder(dtype=tf.float32,shape=[None,x_train.shape[1],1])
tfY=tf.placeholder(dtype=tf.float32,shape=[None,nClasses])

#build model
layer=tf.layers.conv1d(tfX,16,kernel_size,strides=2,activation=tf.nn.selu)
for i in range(convolution_layers):
    layer=tf.layers.conv1d(layer,32,kernel_size,strides=2,activation=tf.nn.selu)
layer=tf.layers.flatten(layer)
layer=tf.layers.dropout(layer,0.5)
layer=tf.layers.dense(layer,32,activation=tf.nn.selu)
layer=tf.layers.dropout(layer,0.5)