Beispiel #1
0
def test_streaming_transducer():
    config = Config(DEFAULT_YAML, learning=False)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes,
                                **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer,
                          text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(
        timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()

    print("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(
        timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()

    print("Converted successfully with timestamp")
assert args.saved

if args.tfrecords:
    test_dataset = ASRTFRecordTestDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)
else:
    test_dataset = ASRSliceTestDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)

# build model
streaming_transducer = StreamingTransducer(
    vocabulary_size=text_featurizer.num_classes, **config.model_config)
streaming_transducer._build(speech_featurizer.shape)
streaming_transducer.load_weights(args.saved, by_name=True)
streaming_transducer.summary(line_length=150)
streaming_transducer.add_featurizers(speech_featurizer, text_featurizer)

streaming_transducer_tester = BaseTester(
    config=config.learning_config.running_config, output_name=args.output_name)
streaming_transducer_tester.compile(streaming_transducer)
streaming_transducer_tester.run(test_dataset)
Beispiel #3
0
    "frame_ms": 25,
    "stride_ms": 10,
    "num_feature_bins": 80,
    "feature_type": "log_mel_spectrogram",
    "preemphasis": 0.97,
    "normalize_signal": True,
    "normalize_feature": True,
    "normalize_per_feature": False
})

model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes,
                            encoder_dmodel=320,
                            encoder_nlayers=3)

model._build(speech_featurizer.shape)
model.summary(line_length=150)

model.save_weights("/tmp/transducer.h5")

model.add_featurizers(speech_featurizer=speech_featurizer,
                      text_featurizer=text_featurizer)

features = tf.zeros(shape=[5, 50, 80, 1], dtype=tf.float32)
pred = model.recognize(features)
print(pred)
pred = model.recognize_beam(features)
print(pred)

# stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# logdir = '/tmp/logs/func/%s' % stamp
# writer = tf.summary.create_file_writer(logdir)
Beispiel #4
0
def test_streaming_transducer():
    config = Config(DEFAULT_YAML, learning=False)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes, **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    tflite_model = converter.convert()

    print("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    converter.convert()

    print("Converted successfully with timestamp")

    tflitemodel = tf.lite.Interpreter(model_content=tflite_model)
    signal = tf.random.normal([4000])

    input_details = tflitemodel.get_input_details()
    output_details = tflitemodel.get_output_details()
    tflitemodel.resize_tensor_input(input_details[0]["index"], signal.shape)
    tflitemodel.allocate_tensors()
    tflitemodel.set_tensor(input_details[0]["index"], signal)
    tflitemodel.set_tensor(
        input_details[1]["index"],
        tf.constant(text_featurizer.blank, dtype=tf.int32)
    )
    tflitemodel.set_tensor(
        input_details[2]["index"],
        tf.zeros(
            [config.model_config["encoder_nlayers"], 2, 1, config.model_config["encoder_rnn_units"]],
            dtype=tf.float32
        )
    )
    tflitemodel.set_tensor(
        input_details[3]["index"],
        tf.zeros(
            [config.model_config["prediction_num_rnns"], 2, 1, config.model_config["prediction_rnn_units"]],
            dtype=tf.float32
        )
    )
    tflitemodel.invoke()
    hyp = tflitemodel.get_tensor(output_details[0]["index"])

    print(hyp)