def main(argv): speech_file = argv[1] feature_type = argv[2] augments = { # "after": { # "time_masking": { # "num_masks": 10, # "mask_factor": 100, # "p_upperbound": 0.05 # }, # "freq_masking": { # "mask_factor": 27 # } # }, } au = UserAugmentation(augments) speech_conf = { "sample_rate": 16000, "frame_ms": 25, "stride_ms": 10, "feature_type": feature_type, "preemphasis": 0.97, "normalize_signal": True, "normalize_feature": True, "normalize_per_feature": False, "num_feature_bins": 80, } signal = read_raw_audio(speech_file, speech_conf["sample_rate"]) sf = NumpySpeechFeaturizer(speech_conf) ft = sf.extract(signal) ft = au["after"].augment(ft)[:, :, 0] plt.figure(figsize=(16, 2.5)) ax = plt.gca() ax.set_title(f"{feature_type}", fontweight="bold") librosa.display.specshow(ft.T, cmap="magma") v1 = np.linspace(ft.min(), ft.max(), 8, endpoint=True) plt.colorbar(pad=0.01, fraction=0.02, ax=ax, format="%.2f", ticks=v1) plt.tight_layout() plt.savefig(argv[3]) plt.show()
from tiramisu_asr.featurizers.speech_featurizers import TFSpeechFeaturizer from tiramisu_asr.featurizers.speech_featurizers import NumpySpeechFeaturizer from tiramisu_asr.featurizers.text_featurizers import SubwordFeaturizer from tiramisu_asr.optimizers.schedules import TransformerSchedule from multiconformers_trainer import MultiConformersTrainer from multiconformers_dataset import MultiConformersTFRecordDataset, MultiConformersSliceDataset config = UserConfig(DEFAULT_YAML, args.config, learning=True) lms_config = config["speech_config"] lms_config["feature_type"] = "log_mel_spectrogram" lgs_config = config["speech_config"] lgs_config["feature_type"] = "log_gammatone_spectrogram" if args.nfx: speech_featurizer_lms = NumpySpeechFeaturizer(lms_config) speech_featurizer_lgs = NumpySpeechFeaturizer(lgs_config) else: speech_featurizer_lms = TFSpeechFeaturizer(lms_config) speech_featurizer_lgs = TFSpeechFeaturizer(lgs_config) if args.subwords_prefix and os.path.exists(f"{args.subwords_prefix}.subwords"): print("Loading subwords ...") text_featurizer = SubwordFeaturizer.load_from_file( config["decoder_config"], args.subwords_prefix) else: print("Generating subwords ...") text_featurizer = SubwordFeaturizer.build_from_corpus( config["decoder_config"], corpus_files=args.subwords_corpus) text_featurizer.subwords.save_to_file(args.subwords_prefix)