def load_models(speakers):
    if len(file_listing(MODEL_PERSIST_PATH, 'gmm')) > 0:
        print('Loading saved GMM models from file')
        return {speaker: load_gmm(speaker) for speaker in speakers}
    else:
        print('GMMs need to be trained first')
        exit(1)
Beispiel #2
0
def train(model, args, experiment=None):
    train_seq = TrainDatasetSequence(args.train_dataset,
                                     batch_size=args.batch_size,
                                     img_size=(args.img_w, args.img_h))
    test_seq = TestDatasetSequence(args.test_dataset,
                                   batch_size=args.batch_size,
                                   img_size=(args.img_w, args.img_h))
    model.compile(optimizer=Adam(lr=0.0002),
                  loss=perceptual_loss(input_shape=(args.img_h, args.img_w, 3)))
    model.summary()

    if args.weights:
        model.load_weights(args.weights)

    callbacks = [
        ModelCheckpoint(
            args.model_save_path + 'fusion_unet_{epoch:02d}_{val_loss:.3f}.h5',
            save_weights_only=True,
            verbose=1),
        TerminateOnNaN()
    ]
    if experiment is not None:
        callbacks.append(LogImages(experiment,
                                   paths=file_listing(args.validation_path),
                                   img_size=(args.img_w, args.img_h)))

    model.fit_generator(
        train_seq,
        epochs=args.epochs,
        validation_data=test_seq,
        use_multiprocessing=True,
        workers=8,
        callbacks=callbacks)
Beispiel #3
0
def train(model, args, experiment=None):
    train_seq = TrainDatasetSequence(args.train_dataset,
                                     batch_size=args.batch_size,
                                     img_size=(args.img_w, args.img_h),
                                     use_lab=True)
    model.compile(optimizer=Adam(lr=0.001), loss='mse')
    model.summary()

    if args.weights:
        model.load_weights(args.weights)

    callbacks = [
        ModelCheckpoint(args.model_save_path +
                        'encoder_decoder_mse_{epoch:02d}.h5',
                        save_weights_only=True,
                        verbose=1),
        TerminateOnNaN()
    ]
    if experiment is not None:
        callbacks.append(
            LogImages(experiment,
                      paths=file_listing(args.validation_path),
                      img_size=(args.img_w, args.img_h),
                      log_iters=500,
                      use_lab=True))

    model.fit_generator(train_seq,
                        epochs=args.epochs,
                        use_multiprocessing=True,
                        workers=8,
                        callbacks=callbacks)
def read_pair(line):
    line_stripped = line.strip().split()

    if len(line_stripped) == 3:  # Same person line - <name> <img_l_id> <img_r_id>
        name  = line_stripped[0]
        img_l = int(line_stripped[1]) - 1
        img_r = int(line_stripped[2]) - 1
        imgs  = file_listing(LFW_PATH + '/' + name, 'jpg')
        return (imgs[img_l], imgs[img_r], True)
    elif len(line_stripped) == 4:  # Different people line - <name_l> <img_l_id> <name_r> <img_r_id>
        name_l = line_stripped[0]
        img_l  = int(line_stripped[1]) - 1
        name_r = line_stripped[2]
        img_r  = int(line_stripped[3]) - 1
        imgs_l = file_listing(LFW_PATH + '/' + name_l, 'jpg')[img_l]
        imgs_r = file_listing(LFW_PATH + '/' + name_r, 'jpg')[img_r]
        return (imgs_l, imgs_r, name_l == name_r)
Beispiel #5
0
 def __init__(self,
              base_test_path,
              batch_size=32,
              input_size=(32, 32),
              scale=3):
     self.batch_size = batch_size
     self.paths      = file_listing(base_test_path, extension='JPEG')
     self.input_size = input_size
     self.scale      = scale
Beispiel #6
0
def read_dataset_dir(base_dir):
    dirs = dir_listing(base_dir)
    X = {last_component(dir): np.empty(OUTPUT_DIM) for dir in dirs}

    for dir_path in dirs:
        for file in file_listing(dir_path, 'wav'):
            speaker = last_component(dir_path)
            audio_np = read_wav(file)
            X[speaker] = np.vstack((X[speaker], audio_np))

    return X
def train(args, experiment=None):
    LOG_PERIOD = 1000
    # Dataset
    train_seq = TrainDatasetSequence(args.train_dataset,
                                     batch_size=args.batch_size // 2,
                                     img_size=(args.img_w, args.img_h))

    # Build GAN
    gan, generator, discriminator = build_gan(
        (args.img_w, args.img_h), args.generator_weights,
        args.discriminator_weights, args.gan_weights)

    # Log images callback
    log_images_callback = LogImages(experiment,
                                    paths=file_listing(args.validation_path),
                                    model=generator,
                                    img_size=(args.img_w, args.img_h),
                                    log_iters=LOG_PERIOD)
    on_batch_end = log_images_callback.on_batch_end
    on_epoch_end = log_images_callback.on_epoch_end

    # Create parallel queue to load images asynchronously
    batch_queue = create_parallel_queue(train_seq)

    # Train GAN
    for epoch in range(args.epochs):
        print(f'Epoch {epoch + 1}')

        for batch_idx in tqdm(range(len(train_seq))):
            gray_imgs, rgb_imgs = next(batch_queue)

            disc_loss = train_discriminator(gan, generator, discriminator,
                                            gray_imgs, rgb_imgs,
                                            args.batch_size)
            _, perceptual_loss, gen_loss = train_generator(
                gan, discriminator, gray_imgs, rgb_imgs, args.batch_size)
            # CometML logs
            metrics = {
                'discriminator_loss': disc_loss,
                'generator_loss': gen_loss,
                'perceptual_loss': perceptual_loss
            }
            experiment.log_metrics(metrics, step=batch_idx, epoch=epoch)
            on_batch_end(log_images_callback.iter, None)
            # Save model
            if batch_idx % LOG_PERIOD == 0:
                path_suffix = f'{args.img_w}x{args.img_h}_epoch-{epoch + 1}_{int(batch_idx / 1000)}K.h5'
                gan.save_weights(f'{args.model_save_path}/gan_{path_suffix}')
                generator.save_weights(
                    f'{args.model_save_path}/generator_{path_suffix}')

        on_epoch_end(epoch, None)
Beispiel #8
0
def train(model, args, experiment):
    output_shape = (args.input_h * args.scale, args.input_w * args.scale, 3)
    train_seq = TrainDatasetSequence(args.train_dataset,
                                     batch_size=args.batch_size,
                                     input_size=(args.input_w, args.input_h),
                                     scale=args.scale)
    test_seq = TestDatasetSequence(args.test_dataset,
                                   batch_size=args.batch_size,
                                   input_size=(args.input_w, args.input_h),
                                   scale=args.scale)
    model.compile(optimizer=Adam(lr=3e-4),
                  loss=perceptual_loss(output_shape),
                  metrics=['mse', psnr_metric()])
    model.summary()

    if args.weights:
        model.load_weights(args.weights)

    callbacks = [
        ModelCheckpoint(args.model_save_path +
                        'sr_{epoch:02d}_{val_loss:.3f}.h5',
                        save_weights_only=True,
                        verbose=1),
        TerminateOnNaN(),
        LogImages(experiment,
                  paths=file_listing(args.validation_path),
                  input_size=(args.input_w, args.input_h),
                  scale=args.scale,
                  log_iters=500)
    ]

    model.fit_generator(train_seq,
                        epochs=args.epochs,
                        validation_data=test_seq,
                        use_multiprocessing=True,
                        workers=8,
                        callbacks=callbacks)
Beispiel #9
0
 def _get_image_paths(self, base_path):
     image_paths = []
     for dirpath in dir_listing(base_path):
         image_paths += file_listing(dirpath, extension='jpg')
     return image_paths

if __name__ == '__main__':

    speaker_recordings, speakers = load_local_dataset()
    gmm_models = {speaker: init_gmm() for speaker in set(speakers)}
    train_gmms(gmm_models)
    save_gmms(gmm_models)

    # Calculate precision
    total = 0
    correct = 0
    wrong_files = {}

    for dir_path in dir_listing(relative_path('../data/dev/')):
        for file in file_listing(dir_path, 'wav'):
            print('Predicting file %s (%s/%s)' % (file, correct, total + 1))
            speaker = predict(gmm_models, file)
            if speaker == last_component(dir_path):
                correct += 1
            else:
                wrong_files[file] = (speaker, last_component(dir_path))
            total += 1

    print('Incorrectly classified')
    for file, val in wrong_files.items():
        print('%s is %s but was classfied as %s' % (file, val[1], val[0]))

    print('Got %s correct out of %s' % (correct, total))
    print('-> %s percent accuracy' % ((correct / total) * 100))
import numpy as np
import face_recognition
from utils import file_listing

base_path = '../data/ids/'
img_paths = file_listing(base_path)


def get_face_emb(filename):
    img_np = face_recognition.load_image_file(base_path + filename)
    embs = face_recognition.face_encodings(img_np)
    return embs[0] if len(embs) > 0 else None


anchor_embs = list(map(get_face_emb, img_paths))


def get_face_identity(img_np):
    '''
    Get face identity agains anchor embeddings.

    Args:
        img_np (numpy): Input image (w, h, c)

    Returns:
        (string) name of anchor image filename without extension
    '''
    target_embs = face_recognition.face_encodings(img_np)

    if len(target_embs) > 0:
        hits = face_recognition.compare_faces(anchor_embs,
Beispiel #12
0
import os
from gmm_train import load_models, get_gmm_path, predict
from dataset import get_speakers
from utils import file_listing, dir_listing, relative_path, get_file_name

EVAL_DIR_PATH = relative_path('../data/eval/')

if __name__ == '__main__':
    speakers = get_speakers()
    gmm_models = load_models(speakers)

    for file in file_listing(EVAL_DIR_PATH, 'wav'):
        speaker = predict(gmm_models, file)
        print('%s -> %s' % (get_file_name(file), speaker))
 def __init__(self, base_test_path, batch_size=128, img_size=(256, 256)):
     self.batch_size = batch_size
     self.paths = file_listing(base_test_path, extension='JPEG')
     self.img_size = img_size