def prepare_featurizers( config: Config, subwords: bool = True, sentence_piece: bool = False, ): speech_featurizer = speech_featurizers.TFSpeechFeaturizer( config.speech_config) if sentence_piece: logger.info("Loading SentencePiece model ...") text_featurizer = text_featurizers.SentencePieceFeaturizer( config.decoder_config) elif subwords: logger.info("Loading subwords ...") text_featurizer = text_featurizers.SubwordFeaturizer( config.decoder_config) else: logger.info("Use characters ...") text_featurizer = text_featurizers.CharFeaturizer( config.decoder_config) return speech_featurizer, text_featurizer
args = parser.parse_args() tf.config.optimizer.set_experimental_options( {"auto_mixed_precision": args.mxp}) strategy = env_util.setup_strategy(args.devices) from tensorflow_asr.configs.config import Config from tensorflow_asr.datasets import asr_dataset from tensorflow_asr.featurizers import speech_featurizers, text_featurizers from tensorflow_asr.models.transducer.conformer import Conformer from tensorflow_asr.optimizers.schedules import TransformerSchedule config = Config(args.config) speech_featurizer = speech_featurizers.TFSpeechFeaturizer(config.speech_config) if args.sentence_piece: logger.info("Loading SentencePiece model ...") text_featurizer = text_featurizers.SentencePieceFeaturizer( config.decoder_config) elif args.subwords: logger.info("Loading subwords ...") text_featurizer = text_featurizers.SubwordFeaturizer(config.decoder_config) else: logger.info("Use characters ...") text_featurizer = text_featurizers.CharFeaturizer(config.decoder_config) if args.tfrecords: train_dataset = asr_dataset.ASRTFRecordDataset( speech_featurizer=speech_featurizer,