def get_data(featurizer: TextFeaturizer): train_dataset = ASRSliceDataset(data_paths=[data_path], speech_featurizer=speech_featurizer, text_featurizer=featurizer, stage="train", shuffle=False) train_data = train_dataset.create(1) return next(iter(train_data))
text_featurizer = CharFeaturizer({ "vocabulary": None, "blank_at_zero": True, "beam_width": 5, "norm_score": True }) speech_featurizer = TFSpeechFeaturizer({ "sample_rate": 16000, "frame_ms": 25, "stride_ms": 10, "num_feature_bins": 80, "feature_type": "log_mel_spectrogram", "preemphasis": 0.97, "normalize_signal": True, "normalize_feature": True, "normalize_per_feature": False }) dataset = ASRSliceDataset(stage="train", speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, data_paths=[data], augmentations=augments, shuffle=True).create(4).take(100) while True: print("--------------------------------------------") for i, batch in enumerate(dataset): print(process.memory_info().rss)
print("Use subwords ...") text_featurizer = SubwordFeaturizer(config.decoder_config) else: print("Use characters ...") text_featurizer = CharFeaturizer(config.decoder_config) tf.random.set_seed(0) if args.tfrecords: test_dataset = ASRTFRecordDataset( speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, **vars(config.learning_config.test_dataset_config)) else: test_dataset = ASRSliceDataset( speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, **vars(config.learning_config.test_dataset_config)) # build model jasper = Jasper(**config.model_config, vocabulary_size=text_featurizer.num_classes) jasper.make(speech_featurizer.shape) jasper.load_weights(args.saved) jasper.summary(line_length=100) jasper.add_featurizers(speech_featurizer, text_featurizer) batch_size = args.bs or config.learning_config.running_config.batch_size test_data_loader = test_dataset.create(batch_size) with file_util.save_file(file_util.preprocess_paths(args.output)) as filepath: overwrite = True
tf.random.set_seed(0) assert args.saved if args.tfrecords: test_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.test_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="test", shuffle=False) else: test_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.test_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="test", shuffle=False) # build model conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes) conformer._build(speech_featurizer.shape) conformer.load_weights(args.saved, by_name=True) conformer.summary(line_length=120) conformer.add_featurizers(speech_featurizer, text_featurizer) conformer_tester = BaseTester(config=config.learning_config.running_config, output_name=args.output_name) conformer_tester.compile(conformer) conformer_tester.run(test_dataset)
stage="train", cache=args.cache, shuffle=True) eval_dataset = ASRTFRecordDataset( data_paths=config.learning_config.dataset_config.eval_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="eval", cache=args.cache, shuffle=True) else: train_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.train_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, stage="train", cache=args.cache, shuffle=True) eval_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.eval_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="eval", cache=args.cache, shuffle=True) conformer_trainer = TransducerTrainerGA( config=config.learning_config.running_config, text_featurizer=text_featurizer, strategy=strategy)
speech_featurizer = TFSpeechFeaturizer(config.speech_config) if args.sentence_piece: logger.info("Use SentencePiece ...") text_featurizer = SentencePieceFeaturizer(config.decoder_config) elif args.subwords: logger.info("Use subwords ...") text_featurizer = SubwordFeaturizer(config.decoder_config) else: logger.info("Use characters ...") text_featurizer = CharFeaturizer(config.decoder_config) tf.random.set_seed(0) test_dataset = ASRSliceDataset(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, **vars( config.learning_config.test_dataset_config)) # build model rnn_transducer = RnnTransducer(**config.model_config, vocabulary_size=text_featurizer.num_classes) rnn_transducer.make(speech_featurizer.shape) rnn_transducer.load_weights(args.saved, by_name=True) rnn_transducer.summary(line_length=100) rnn_transducer.add_featurizers(speech_featurizer, text_featurizer) batch_size = args.bs or config.learning_config.running_config.batch_size test_data_loader = test_dataset.create(batch_size) with file_util.save_file(file_util.preprocess_paths(args.output)) as filepath: overwrite = True
text_featurizer=text_featurizer, augmentations=config["learning_config"]["augmentations"], stage="train", cache=args.cache, shuffle=True ) eval_dataset = ASRTFRecordDataset( data_paths=config["learning_config"]["dataset_config"]["eval_paths"], tfrecords_dir=config["learning_config"]["dataset_config"]["tfrecords_dir"], speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="eval", cache=args.cache, shuffle=True ) else: train_dataset = ASRSliceDataset( speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, data_paths=config["learning_config"]["dataset_config"]["train_paths"], augmentations=config["learning_config"]["augmentations"], stage="train", cache=args.cache, shuffle=True ) eval_dataset = ASRSliceDataset( speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, data_paths=config["learning_config"]["dataset_config"]["eval_paths"], stage="eval", cache=args.cache, shuffle=True ) ctc_trainer = CTCTrainer(text_featurizer, config["learning_config"]["running_config"]) # Build DS2 model with ctc_trainer.strategy.scope(): jasper = Jasper(**config["model_config"], vocabulary_size=text_featurizer.num_classes) jasper._build(speech_featurizer.shape)
#txf = CharFeaturizer(None) #b = txf.extract("fkaff aksfbfnak kcjhoiu") #print (b) config_dir = "tests/config_aishell.yml" config = Config(config_dir, learning=True) speech_featurizer = TFSpeechFeaturizer(config.speech_config) text_featurizer = CharFeaturizer(config.decoder_config) train_dataset = ASRSliceDataset( data_paths=config.learning_config.dataset_config.train_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, stage="train", cache=False, shuffle=True, sort=False) train_data = train_dataset.create(2) train_data_loader = strategy.experimental_distribute_dataset(train_data) train_iterator = iter(train_data_loader) while True: batch = next(train_iterator) features, input_length, labels, label_length, prediction, prediction_length = batch #print ("features") #print (features)