def main(args): print(create_args_str(args)) lang, audio, trans, keras, ds, ds_alpha, ds_trie, lm, vocab, target_dir, normalize, gpu = setup(args) print(f'all artifacts will be saved to {target_dir}') lm = load_lm(lm) if lm else None vocab = load_vocab(vocab) if vocab else None audio_bytes, sample_rate, transcript, language = preprocess(audio, trans, lang, norm_transcript=normalize) voiced_segments = vad(audio_bytes, sample_rate) df_alignments = pipeline(voiced_segments=voiced_segments, sample_rate=sample_rate, transcript=transcript, language='en', ds_path=ds, ds_alpha_path=ds_alpha, ds_trie_path=ds_trie, keras_path=keras, lm=lm, vocab=vocab, force_realignment=args.force_realignment, align_endings=args.align_endings, target_dir=target_dir) df_stats = calculate_stats(df_alignments, ds, transcript) create_demo_files(target_dir, audio, transcript, df_alignments, df_stats) print() print_dataframe(df_stats) print() stats_csv = join(target_dir, 'stats.csv') print(f'Saving stats to {stats_csv}') df_alignments.to_csv(stats_csv)
def main(): print(create_args_str(args)) print( f'Processing files from {args.source} and saving them in {args.target}' ) corpus, corpus_file = create_corpus(args.source, args.target, args.limit) print(f'Done! Corpus with {len(corpus)} entries saved to {corpus_file}')
def main(date_time): print(create_args_str(args)) target_dir = setup(date_time) print() print(f'all output will be written to {target_dir}') print() print(f'creating {args.optimizer.upper()} optimizer for model') if args.optimizer == 'adam': opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.01, epsilon=1e-8) else: opt = SGD(lr=args.learning_rate, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = create_model(target_dir, opt, args.dropouts, args.language) train_model(model, args.language, target_dir, args.minutes)
def main(): print(create_args_str(args)) target_dir = setup(args) print() print(f'all output will be written to {target_dir}') print() opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = load_keras_model(args.model_dir, opt) model.summary() lm, vocab = None, None if args.lm: lm = load_lm(args.lm) vocab = load_vocab(args.lm_vocab) test_model(model, args.test_files, args.test_batches, args.batch_size, args.language, lm, vocab, target_dir)
def main(args): print(create_args_str(args)) target_dir, corpus_id, force, synthesize, min_dur, max_dur, precompute_features = setup( args) corpus = get_corpus(args.source_dir, args.language) corpus.summary() print( f'processing {corpus.name} corpus and saving split segments in {target_dir}' ) csv_train, csv_dev, csv_test = extract_segments(target_dir, corpus_id, corpus, synthesize, min_dur, max_dur, force) print(f'done! All files are in {target_dir}') corpus = DeepSpeechCorpus(args.language, csv_train, csv_dev, csv_test) corpus.summary() if precompute_features: print(f'pre-computing features') compute_features(csv_train, csv_dev, csv_test, target_dir, force)
def main(args): print(create_args_str(args)) target_dir, keras_path, lm_path, vocab_path, gpu = setup(args) print(f'all results will be written to {target_dir}') lm = load_lm(lm_path) if lm_path else None vocab = load_vocab(vocab_path) if vocab_path else None corpus = get_corpus('rl', 'de') corpus.summary() test_entries = list(set((segment.entry for segment in corpus.test_set()))) # add 6 entries from PodClub corpus corpus = get_corpus('pc', 'de') corpus.summary() test_entries += [ corpus['record1058'], corpus['record1063'], corpus['record1076'], corpus['record1523'], corpus['record1548'], corpus['record1556'] ] stats = [] for i, entry in enumerate(test_entries): print(f'entry {i + 1}/{len(test_entries)}') audio_file = entry.audio_path sample_rate = entry.rate with open(entry.transcript_path, encoding='utf-8') as f: transcript = f.read() if args.norm_transcript: transcript = normalize(transcript, 'de') demo_id = splitext(basename(audio_file))[0] target_dir_entry = join(target_dir, demo_id) if not exists(target_dir_entry): makedirs(target_dir_entry) voiced_segments = [ Voice(s.audio, s.rate, s.start_frame, s.end_frame) for s in entry ] df_alignments = pipeline(voiced_segments=voiced_segments, sample_rate=sample_rate, transcript=transcript, language='de', keras_path=keras_path, lm=lm, vocab=vocab, force_realignment=args.force_realignment, align_endings=args.align_endings, target_dir=target_dir_entry) df_stats = calculate_stats(df_alignments, keras_path, transcript) # calculate average similarity between Keras-alignment and original aligments original_alignments = [s.transcript for s in entry.segments] av_similarity = np.mean([ levenshtein_similarity(ka, oa) for (ka, oa) in zip(df_alignments['alignment'], original_alignments) ]) df_stats['similarity'] = av_similarity create_demo_files(target_dir_entry, audio_file, transcript, df_alignments, df_stats) stats.append(df_stats) df_keras = pd.concat(stats) csv_keras = join(target_dir, 'performance.csv') df_keras.to_csv(csv_keras) print(f'summary saved to {csv_keras}') visualize_pipeline_performance(csv_keras, csv_ds=None, silent=True) update_index(target_dir, lang='de', num_aligned=len(test_entries), df_keras=df_keras, keras_path=keras_path, lm_path=lm_path, vocab_path=vocab_path) K.clear_session()
def main(args): print(create_args_str(args)) demo_files, target_dir, keras_path, ds_path, ds_alpha, ds_trie, lm_path, vocab_path, normalize, gpu = setup( args) num_files = len(demo_files) print( f'Processing {num_files} audio/transcript samples. All results will be written to {target_dir}' ) lm = load_lm(lm_path) if lm_path else None vocab = load_vocab(vocab_path) if vocab_path else None stats_keras, stats_ds = [], [] for i, (audio, transcript) in enumerate(demo_files): print( '-----------------------------------------------------------------' ) print(f'{i + 1}/{num_files}: Evaluating pipeline on {audio}') print( '-----------------------------------------------------------------' ) demo_id = splitext(basename(audio))[0] target_dir_ds = join(target_dir, demo_id + '_ds') target_dir_keras = join(target_dir, demo_id + '_keras') audio_bytes, sample_rate, transcript, language = preprocess( audio, transcript, 'en', norm_transcript=normalize) voiced_segments = vad(audio_bytes, sample_rate) df_alignments_ds = pipeline(voiced_segments=voiced_segments, sample_rate=sample_rate, transcript=transcript, language='en', ds_path=ds_path, ds_alpha_path=ds_alpha, ds_trie_path=ds_trie, lm_path=lm, force_realignment=args.force_realignment, align_endings=args.align_endings, target_dir=target_dir_ds) df_stats_ds = calculate_stats(df_alignments_ds, ds_path, transcript) df_alignments_keras = pipeline( voiced_segments=voiced_segments, sample_rate=sample_rate, transcript=transcript, language='en', keras_path=keras_path, lm=lm, vocab=vocab, force_realignment=args.force_realignment, align_endings=args.align_endings, target_dir=target_dir_keras) df_stats_keras = calculate_stats(df_alignments_keras, keras_path, transcript) # average similarity between Keras and DeepSpeech alignments av_similarity = np.mean([ levenshtein_similarity(al_keras, al_ds) for (al_keras, al_ds) in zip(df_alignments_keras['alignment'], df_alignments_ds['alignment']) ]) df_stats_ds['similarity'] = av_similarity df_stats_keras['similarity'] = av_similarity stats_ds.append(df_stats_ds) stats_keras.append(df_stats_keras) create_demo_files(target_dir_ds, audio, transcript, df_alignments_ds, df_stats_ds) create_demo_files(target_dir_keras, audio, transcript, df_alignments_keras, df_stats_keras) df_keras = pd.concat(stats_keras) csv_keras = join(target_dir, 'performance_keras.csv') df_keras.to_csv(csv_keras) df_ds = pd.concat(stats_ds) csv_ds = join(target_dir, 'performance_ds.csv') df_ds.to_csv(csv_ds) print(f'summary saved to {csv_keras}') visualize_pipeline_performance(csv_keras, csv_ds, silent=True) update_index(target_dir, lang='en', num_aligned=len(demo_files), df_keras=df_keras, keras_path=keras_path, df_ds=df_ds, ds_path=ds_path, lm_path=lm_path, vocab_path=vocab_path) print(f'Done! Demos have been saved to {target_dir}')
def main(args): print(create_args_str(args)) csv_ds, csv_keras, silent = setup(args) visualize_pipeline_performance(csv_keras, csv_ds, silent)