def train_basic_audio_classifier(num_classes, model_savepath, training_data_folder,training_epoch, batch_size = 128, amount_limit=5000, forceLoad = True): """ train a basic audio classification model. """ epochs = training_epoch kernel_size = 5 framerate = 16384 (x_train, y_train), (x_test, y_test) = load_audio(training_data_folder, num_classes, forceLoad=forceLoad, framerate=framerate,amount_limit=amount_limit) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) #save_sound(x_train, "classification","xtrain",upscale=False) #save_sound(x_test, "classification","xtest",upscale=False) input_shape = (x_train.shape[1],1) convolution_layers = count_convolutions(input_shape, kernel_size) model = keras.models.Sequential() model.add(Conv1D(16, kernel_size=kernel_size, activation="selu", strides=2, input_shape=input_shape, padding="same")) for i in range(convolution_layers): model.add(Conv1D(32, kernel_size=kernel_size, activation="selu", strides=2,padding="same")) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(128, activation="selu")) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(0.0005), metrics=['accuracy']) model.summary() #model.fit(np.expand_dims(x_train, axis=2), y_train, # batch_size=batch_size, # epochs=epochs, # verbose=1, # validation_data=(np.expand_dims(x_test, axis=2), y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save(model_savepath) print('model saved to ', model_savepath)
def build_critic(self): model = Sequential() convolution_layers = count_convolutions(self.audio_shape, self.kernel_size) input_shape = (self.audio_shape[1], 1) model = keras.models.Sequential() model.add( Conv1D(16, kernel_size=self.kernel_size, activation='selu', strides=2, input_shape=self.audio_shape, padding="same")) for i in range(convolution_layers): model.add( Conv1D(32, kernel_size=self.kernel_size, activation='selu', strides=2, padding="same")) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(32, activation='selu')) model.add(Dropout(0.5)) model.add(Dense(1)) model.summary() clip = Input(shape=self.audio_shape) validity = model(clip) return Model(clip, validity)
import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" num_classes = 10 (x_train, y_train), (x_test, y_test) = load_audio("speech_commands", num_classes) batch_size = 30 epochs = 50 kernel_size = 5 input_shape = (x_train.shape[1], 1) convolution_layers = count_convolutions(input_shape, kernel_size) model = keras.models.Sequential() model.add(Flatten(input_shape=input_shape)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) model.summary() model.fit(x_train, y_train,
idx = np.arange(0 , len(data)) np.random.shuffle(idx) idx = idx[:num] data_shuffle = [data[ i] for i in idx] labels_shuffle = [labels[ i] for i in idx] return np.asarray(data_shuffle), np.asarray(labels_shuffle) nClasses=10 (x_train, y_train), (x_test, y_test) = load_audio("speech_commands", nClasses) kernel_size = 5 sess=tf.InteractiveSession() convolution_layers = count_convolutions(x_train.shape, kernel_size) print("{} convolution layers".format(convolution_layers)) #declare input placeholders to which to upload data tfX=tf.placeholder(dtype=tf.float32,shape=[None,x_train.shape[1],1]) tfY=tf.placeholder(dtype=tf.float32,shape=[None,nClasses]) #build model layer=tf.layers.conv1d(tfX,16,kernel_size,strides=2,activation=tf.nn.selu) for i in range(convolution_layers): layer=tf.layers.conv1d(layer,32,kernel_size,strides=2,activation=tf.nn.selu) layer=tf.layers.flatten(layer) layer=tf.layers.dropout(layer,0.5) layer=tf.layers.dense(layer,32,activation=tf.nn.selu) layer=tf.layers.dropout(layer,0.5)