def get_data(featurizer: TextFeaturizer):
     train_dataset = ASRSliceDataset(data_paths=[data_path],
                                     speech_featurizer=speech_featurizer,
                                     text_featurizer=featurizer,
                                     stage="train",
                                     shuffle=False)
     train_data = train_dataset.create(1)
     return next(iter(train_data))
Esempio n. 2
0
text_featurizer = CharFeaturizer({
    "vocabulary": None,
    "blank_at_zero": True,
    "beam_width": 5,
    "norm_score": True
})

speech_featurizer = TFSpeechFeaturizer({
    "sample_rate": 16000,
    "frame_ms": 25,
    "stride_ms": 10,
    "num_feature_bins": 80,
    "feature_type": "log_mel_spectrogram",
    "preemphasis": 0.97,
    "normalize_signal": True,
    "normalize_feature": True,
    "normalize_per_feature": False
})

dataset = ASRSliceDataset(stage="train",
                          speech_featurizer=speech_featurizer,
                          text_featurizer=text_featurizer,
                          data_paths=[data],
                          augmentations=augments,
                          shuffle=True).create(4).take(100)

while True:
    print("--------------------------------------------")
    for i, batch in enumerate(dataset):
        print(process.memory_info().rss)
Esempio n. 3
0
    print("Use subwords ...")
    text_featurizer = SubwordFeaturizer(config.decoder_config)
else:
    print("Use characters ...")
    text_featurizer = CharFeaturizer(config.decoder_config)

tf.random.set_seed(0)

if args.tfrecords:
    test_dataset = ASRTFRecordDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.test_dataset_config))
else:
    test_dataset = ASRSliceDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.test_dataset_config))

# build model
jasper = Jasper(**config.model_config,
                vocabulary_size=text_featurizer.num_classes)
jasper.make(speech_featurizer.shape)
jasper.load_weights(args.saved)
jasper.summary(line_length=100)
jasper.add_featurizers(speech_featurizer, text_featurizer)

batch_size = args.bs or config.learning_config.running_config.batch_size
test_data_loader = test_dataset.create(batch_size)

with file_util.save_file(file_util.preprocess_paths(args.output)) as filepath:
    overwrite = True
tf.random.set_seed(0)
assert args.saved

if args.tfrecords:
    test_dataset = ASRTFRecordDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)
else:
    test_dataset = ASRSliceDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)

# build model
conformer = Conformer(**config.model_config,
                      vocabulary_size=text_featurizer.num_classes)
conformer._build(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True)
conformer.summary(line_length=120)
conformer.add_featurizers(speech_featurizer, text_featurizer)

conformer_tester = BaseTester(config=config.learning_config.running_config,
                              output_name=args.output_name)
conformer_tester.compile(conformer)
conformer_tester.run(test_dataset)
Esempio n. 5
0
        stage="train",
        cache=args.cache,
        shuffle=True)
    eval_dataset = ASRTFRecordDataset(
        data_paths=config.learning_config.dataset_config.eval_paths,
        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="eval",
        cache=args.cache,
        shuffle=True)
else:
    train_dataset = ASRSliceDataset(
        data_paths=config.learning_config.dataset_config.train_paths,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        augmentations=config.learning_config.augmentations,
        stage="train",
        cache=args.cache,
        shuffle=True)
    eval_dataset = ASRSliceDataset(
        data_paths=config.learning_config.dataset_config.eval_paths,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="eval",
        cache=args.cache,
        shuffle=True)

conformer_trainer = TransducerTrainerGA(
    config=config.learning_config.running_config,
    text_featurizer=text_featurizer,
    strategy=strategy)
Esempio n. 6
0
speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.sentence_piece:
    logger.info("Use SentencePiece ...")
    text_featurizer = SentencePieceFeaturizer(config.decoder_config)
elif args.subwords:
    logger.info("Use subwords ...")
    text_featurizer = SubwordFeaturizer(config.decoder_config)
else:
    logger.info("Use characters ...")
    text_featurizer = CharFeaturizer(config.decoder_config)

tf.random.set_seed(0)

test_dataset = ASRSliceDataset(speech_featurizer=speech_featurizer,
                               text_featurizer=text_featurizer,
                               **vars(
                                   config.learning_config.test_dataset_config))

# build model
rnn_transducer = RnnTransducer(**config.model_config,
                               vocabulary_size=text_featurizer.num_classes)
rnn_transducer.make(speech_featurizer.shape)
rnn_transducer.load_weights(args.saved, by_name=True)
rnn_transducer.summary(line_length=100)
rnn_transducer.add_featurizers(speech_featurizer, text_featurizer)

batch_size = args.bs or config.learning_config.running_config.batch_size
test_data_loader = test_dataset.create(batch_size)

with file_util.save_file(file_util.preprocess_paths(args.output)) as filepath:
    overwrite = True
Esempio n. 7
0
        text_featurizer=text_featurizer,
        augmentations=config["learning_config"]["augmentations"],
        stage="train", cache=args.cache, shuffle=True
    )
    eval_dataset = ASRTFRecordDataset(
        data_paths=config["learning_config"]["dataset_config"]["eval_paths"],
        tfrecords_dir=config["learning_config"]["dataset_config"]["tfrecords_dir"],
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="eval", cache=args.cache, shuffle=True
    )
else:
    train_dataset = ASRSliceDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        data_paths=config["learning_config"]["dataset_config"]["train_paths"],
        augmentations=config["learning_config"]["augmentations"],
        stage="train", cache=args.cache, shuffle=True
    )
    eval_dataset = ASRSliceDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        data_paths=config["learning_config"]["dataset_config"]["eval_paths"],
        stage="eval", cache=args.cache, shuffle=True
    )

ctc_trainer = CTCTrainer(text_featurizer, config["learning_config"]["running_config"])
# Build DS2 model
with ctc_trainer.strategy.scope():
    jasper = Jasper(**config["model_config"], vocabulary_size=text_featurizer.num_classes)
    jasper._build(speech_featurizer.shape)
#txf = CharFeaturizer(None)
#b = txf.extract("fkaff aksfbfnak kcjhoiu")
#print (b)

config_dir = "tests/config_aishell.yml"
config = Config(config_dir, learning=True)

speech_featurizer = TFSpeechFeaturizer(config.speech_config)
text_featurizer = CharFeaturizer(config.decoder_config)

train_dataset = ASRSliceDataset(
    data_paths=config.learning_config.dataset_config.train_paths,
    speech_featurizer=speech_featurizer,
    text_featurizer=text_featurizer,
    augmentations=config.learning_config.augmentations,
    stage="train",
    cache=False,
    shuffle=True,
    sort=False)

train_data = train_dataset.create(2)

train_data_loader = strategy.experimental_distribute_dataset(train_data)

train_iterator = iter(train_data_loader)
while True:
    batch = next(train_iterator)
    features, input_length, labels, label_length, prediction, prediction_length = batch
    #print ("features")
    #print (features)