def build_keras_inputs(working_dir, counts_per_speaker):
    # counts_per_speaker: If you specify --counts_per_speaker 600,100, that means for each speaker,
    # you're going to generate 600 samples for training and 100 for testing. One sample is 160 frames
    # by default (~roughly 1.6 seconds).
    counts_per_speaker = [int(b) for b in counts_per_speaker.split(',')]
    kc = KerasFormatConverter(working_dir)
    kc.generate(max_length=NUM_FRAMES, counts_per_speaker=counts_per_speaker)
    kc.persist_to_disk()
Esempio n. 2
0
def main(args):
    ensures_dir(args.working_dir)

    if args.preprocess:
        if args.audio_dir is None:
            return Audio(cache_dir=args.working_dir, audio_dir=args.audio_dir, sample_rate=args.sample_rate)
    if args.build_keras_inputs:
        counts_per_speaker = [int(b) for b in args.counts_per_speaker.split(',')]
        kc = KerasFormatConverter(args.working_dir)
        kc.generate(max_length=NUM_FRAMES, counts_per_speaker=counts_per_speaker)
        kc.persist_to_disk()

    if args.train_embedding:
        if args.pre_training_phase:
            start_training(args.working_dir, pre_training_phase=args.pre_training_phase, epochs=args.epochs_pretrain)
        start_training(args.working_dir,  pre_training_phase=False, epochs=args.epochs_triplet)
    if args.train_classifier:
        start_training(args.working_dir, pre_training_phase=False, classify=True, epochs=args.epochs_classifier)
Esempio n. 3
0
def build_keras_inputs(working_dir, counts_per_speaker):
    counts_per_speaker = [int(b) for b in counts_per_speaker.split(',')]
    kc = KerasFormatConverter(working_dir)
    kc.generate(max_length=NUM_FRAMES, counts_per_speaker=counts_per_speaker)
    kc.persist_to_disk()