def build_keras_inputs(working_dir, counts_per_speaker): # counts_per_speaker: If you specify --counts_per_speaker 600,100, that means for each speaker, # you're going to generate 600 samples for training and 100 for testing. One sample is 160 frames # by default (~roughly 1.6 seconds). counts_per_speaker = [int(b) for b in counts_per_speaker.split(',')] kc = KerasFormatConverter(working_dir) kc.generate(max_length=NUM_FRAMES, counts_per_speaker=counts_per_speaker) kc.persist_to_disk()
def main(args): ensures_dir(args.working_dir) if args.preprocess: if args.audio_dir is None: return Audio(cache_dir=args.working_dir, audio_dir=args.audio_dir, sample_rate=args.sample_rate) if args.build_keras_inputs: counts_per_speaker = [int(b) for b in args.counts_per_speaker.split(',')] kc = KerasFormatConverter(args.working_dir) kc.generate(max_length=NUM_FRAMES, counts_per_speaker=counts_per_speaker) kc.persist_to_disk() if args.train_embedding: if args.pre_training_phase: start_training(args.working_dir, pre_training_phase=args.pre_training_phase, epochs=args.epochs_pretrain) start_training(args.working_dir, pre_training_phase=False, epochs=args.epochs_triplet) if args.train_classifier: start_training(args.working_dir, pre_training_phase=False, classify=True, epochs=args.epochs_classifier)
def build_keras_inputs(working_dir, counts_per_speaker): counts_per_speaker = [int(b) for b in counts_per_speaker.split(',')] kc = KerasFormatConverter(working_dir) kc.generate(max_length=NUM_FRAMES, counts_per_speaker=counts_per_speaker) kc.persist_to_disk()