def build_tensorboard(tmp_generator, tb_folder):
        for a_file in os.listdir(tb_folder):
            file_path = join(tb_folder, a_file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(e, file=sys.stderr)

        tb = TensorBoard(tb_folder,
                         write_graph=False,
                         histogram_freq=1,
                         write_grads=True,
                         write_images=False)
        x, y = next(tmp_generator)

        tb.validation_data = x
        tb.validation_data[1] = np.expand_dims(tb.validation_data[1], axis=-1)
        if isinstance(y, list):
            num_targets = len(y)
            tb.validation_data += [y[0]] + y[1:]
        else:
            tb.validation_data += [y]
            num_targets = 1

        tb.validation_data += [np.ones(x[0].shape[0])] * num_targets + [0.0]
        return tb
Beispiel #2
0
def train(model_name):
    model = load_model(model_name)
    train_gen = train_generator(model.output_shape[3], (['wnp'], ['fill']),
                                (height, width), 64)
    check_pointer = ModelCheckpoint(filepath='models/' + model_name + '.hdf5',
                                    verbose=1,
                                    save_best_only=True,
                                    monitor='loss')
    reduce = ReduceLROnPlateau(monitor='loss',
                               factor=0.3,
                               verbose=1,
                               cooldown=10)
    tensor_board = TensorBoard(
        log_dir='models/' + model_name + '/',
        #write_images=True,
        #write_grads=True,
        #write_graph=True,
        #histogram_freq=1
    )
    tensor_board.validation_data = input_samples
    epoch = 0
    epochs = 20
    #while running:
    try:
        for i, o in train_gen:
            if not running:
                break
            check_memory()
            calc_sample_results(model)
            model.fit(i,
                      o,
                      epochs=epoch + epochs,
                      initial_epoch=epoch,
                      callbacks=[check_pointer, tensor_board],
                      batch_size=6)
            #model.fit_generator(
            #    train_gen, steps_per_epoch=30,
            #    epochs=epoch + epochs, initial_epoch=epoch, callbacks=[check_pointer])
            epoch += epochs
    except MemoryError:
        return
Beispiel #3
0
def main():

    corpus = TIMIT(dirpath=TIMIT_DATA_DIR)
    quantizer = MuLawQuantizer(k=256)
    gm = AutoregressiveGenerativeModel(quantizer,
                                       nbChannels=corpus.nbChannels,
                                       nbFilters=64,
                                       name='gm')

    modelPath = os.path.join(CDIR, 'models')
    if not os.path.exists(modelPath):
        os.makedirs(modelPath)
    modelFilename = os.path.join(modelPath, 'gm.h5')

    logPath = os.path.join(
        CDIR, 'logs', 'gm',
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    if not os.path.exists(logPath):
        os.makedirs(logPath)
    gm.compile(optimizer=Adam(1e-2),
               loss='categorical_crossentropy',
               metrics=[
                   'categorical_accuracy',
               ])
    #                                                                                top_3_categorical_accuracy,
    #                                                                                top_5_categorical_accuracy,
    #                                                                                top_10_categorical_accuracy])
    print(gm.summary())

    # NOTE: memory requirement for processing speech on the raw waveform is too high to have a large batch size
    batchSize = 8
    trainData = corpus.trainData()
    trainGenerator = AutoregressiveGenerativeModelDataGenerator(
        gm, trainData, corpus.fs, batchSize)
    logger.info('Number of audio samples in training set: %d' %
                (len(trainData)))

    testData = corpus.testData()
    testGenerator = AutoregressiveGenerativeModelDataGenerator(
        gm, testData, corpus.fs, batchSize)
    logger.info('Number of audio samples in test set: %d' % (len(testData)))

    callbacks = []

    tensorboard = TensorBoard(logPath,
                              histogram_freq=1,
                              write_graph=False,
                              batch_size=batchSize,
                              write_grads=True)
    tensorboardGenerator = AutoregressiveGenerativeModelDataGenerator(
        gm, testData[:10], corpus.fs, batchSize)
    x, y = tensorboardGenerator[0]
    tensorboard.validation_data = [
        x,  # X
        y,  # y
        np.ones(len(x)),  # sample weights
    ]
    callbacks.append(tensorboard)

    checkpointer = ModelCheckpoint(modelFilename,
                                   monitor='val_loss',
                                   save_best_only=True,
                                   save_weights_only=True)
    callbacks.append(checkpointer)

    callbacks.append(
        ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-5))

    callbacks.append(TerminateOnNaN())
    callbacks.append(
        EarlyStopping(monitor='val_loss',
                      min_delta=1e-6,
                      patience=50,
                      mode='auto'))

    try:
        gm.fit_generator(trainGenerator,
                         epochs=200,
                         validation_data=testGenerator,
                         use_multiprocessing=False,
                         shuffle=True,
                         verbose=1,
                         callbacks=callbacks)
    except KeyboardInterrupt:
        logger.info("Training interrupted by the user")
        gm.save_weights(modelFilename)

    gm.save_weights(modelFilename)

    logger.info('All done.')