def prepare_featurizers(
    config: Config,
    subwords: bool = True,
    sentence_piece: bool = False,
):
    speech_featurizer = speech_featurizers.TFSpeechFeaturizer(
        config.speech_config)
    if sentence_piece:
        logger.info("Loading SentencePiece model ...")
        text_featurizer = text_featurizers.SentencePieceFeaturizer(
            config.decoder_config)
    elif subwords:
        logger.info("Loading subwords ...")
        text_featurizer = text_featurizers.SubwordFeaturizer(
            config.decoder_config)
    else:
        logger.info("Use characters ...")
        text_featurizer = text_featurizers.CharFeaturizer(
            config.decoder_config)
    return speech_featurizer, text_featurizer
Exemple #2
0
from tensorflow_asr.configs.config import Config
from tensorflow_asr.datasets import asr_dataset
from tensorflow_asr.featurizers import speech_featurizers, text_featurizers
from tensorflow_asr.models.transducer.conformer import Conformer
from tensorflow_asr.optimizers.schedules import TransformerSchedule

config = Config(args.config)
speech_featurizer = speech_featurizers.TFSpeechFeaturizer(config.speech_config)

if args.sentence_piece:
    logger.info("Loading SentencePiece model ...")
    text_featurizer = text_featurizers.SentencePieceFeaturizer(
        config.decoder_config)
elif args.subwords:
    logger.info("Loading subwords ...")
    text_featurizer = text_featurizers.SubwordFeaturizer(config.decoder_config)
else:
    logger.info("Use characters ...")
    text_featurizer = text_featurizers.CharFeaturizer(config.decoder_config)

if args.tfrecords:
    train_dataset = asr_dataset.ASRTFRecordDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.train_dataset_config),
        indefinite=True)
    eval_dataset = asr_dataset.ASRTFRecordDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.eval_dataset_config),
        indefinite=True)