Ejemplo n.º 1
0
def main(
    config: str = DEFAULT_YAML,
    h5: str = None,
    subwords: bool = False,
    sentence_piece: bool = False,
    output: str = None,
):
    assert h5 and output
    tf.keras.backend.clear_session()
    tf.compat.v1.enable_control_flow_v2()

    config = Config(config)
    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    conformer = Conformer(**config.model_config,
                          vocabulary_size=text_featurizer.num_classes)
    conformer.make(speech_featurizer.shape)
    conformer.load_weights(h5, by_name=True)
    conformer.summary(line_length=100)
    conformer.add_featurizers(speech_featurizer, text_featurizer)

    exec_helpers.convert_tflite(model=conformer, output=output)
Ejemplo n.º 2
0
def main(
    config: str = DEFAULT_YAML,
    h5: str = None,
    sentence_piece: bool = False,
    subwords: bool = False,
    output_dir: str = None,
):
    assert h5 and output_dir
    config = Config(config)
    tf.random.set_seed(0)
    tf.keras.backend.clear_session()

    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    # build model
    conformer = Conformer(**config.model_config,
                          vocabulary_size=text_featurizer.num_classes)
    conformer.make(speech_featurizer.shape)
    conformer.load_weights(h5, by_name=True)
    conformer.summary(line_length=100)
    conformer.add_featurizers(speech_featurizer, text_featurizer)

    class ConformerModule(tf.Module):
        def __init__(self, model: Conformer, name=None):
            super().__init__(name=name)
            self.model = model
            self.num_rnns = config.model_config["prediction_num_rnns"]
            self.rnn_units = config.model_config["prediction_rnn_units"]
            self.rnn_nstates = 2 if config.model_config[
                "prediction_rnn_type"] == "lstm" else 1

        @tf.function(
            input_signature=[tf.TensorSpec(shape=[None], dtype=tf.float32)])
        def pred(self, signal):
            predicted = tf.constant(0, dtype=tf.int32)
            states = tf.zeros(
                [self.num_rnns, self.rnn_nstates, 1, self.rnn_units],
                dtype=tf.float32)
            features = self.model.speech_featurizer.tf_extract(signal)
            encoded = self.model.encoder_inference(features)
            hypothesis = self.model._perform_greedy(encoded,
                                                    tf.shape(encoded)[0],
                                                    predicted,
                                                    states,
                                                    tflite=False)
            transcript = self.model.text_featurizer.indices2upoints(
                hypothesis.prediction)
            return transcript

    module = ConformerModule(model=conformer)
    tf.saved_model.save(module,
                        export_dir=output_dir,
                        signatures=module.pred.get_concrete_function())
Ejemplo n.º 3
0
def main(
    config: str = DEFAULT_YAML,
    saved: str = None,
    mxp: bool = False,
    bs: int = None,
    sentence_piece: bool = False,
    subwords: bool = False,
    device: int = 0,
    cpu: bool = False,
    output: str = "test.tsv",
):
    assert saved and output
    tf.random.set_seed(0)
    tf.keras.backend.clear_session()
    tf.config.optimizer.set_experimental_options({"auto_mixed_precision": mxp})
    env_util.setup_devices([device], cpu=cpu)

    config = Config(config)

    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    conformer = Conformer(**config.model_config,
                          vocabulary_size=text_featurizer.num_classes)
    conformer.make(speech_featurizer.shape)
    conformer.load_weights(saved, by_name=True)
    conformer.summary(line_length=100)
    conformer.add_featurizers(speech_featurizer, text_featurizer)

    test_dataset = dataset_helpers.prepare_testing_datasets(
        config=config,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer)
    batch_size = bs or config.learning_config.running_config.batch_size
    test_data_loader = test_dataset.create(batch_size)

    exec_helpers.run_testing(model=conformer,
                             test_dataset=test_dataset,
                             test_data_loader=test_data_loader,
                             output=output)
Ejemplo n.º 4
0
global_batch_size = args.bs or config.learning_config.running_config.batch_size
global_batch_size *= strategy.num_replicas_in_sync

train_data_loader = train_dataset.create(global_batch_size)
eval_data_loader = eval_dataset.create(global_batch_size)

with strategy.scope():
    # build model
    conformer = Conformer(**config.model_config,
                          vocabulary_size=text_featurizer.num_classes)
    conformer.make(speech_featurizer.shape,
                   prediction_shape=text_featurizer.prepand_shape,
                   batch_size=global_batch_size)
    if args.pretrained:
        conformer.load_weights(args.pretrained,
                               by_name=True,
                               skip_mismatch=True)
    conformer.summary(line_length=100)
    optimizer = tf.keras.optimizers.Adam(
        TransformerSchedule(
            d_model=conformer.dmodel,
            warmup_steps=config.learning_config.optimizer_config.pop(
                "warmup_steps", 10000),
            max_lr=(0.05 / math.sqrt(conformer.dmodel))),
        **config.learning_config.optimizer_config)
    conformer.compile(optimizer=optimizer,
                      experimental_steps_per_execution=args.spx,
                      global_batch_size=global_batch_size,
                      blank=text_featurizer.blank)

callbacks = [
Ejemplo n.º 5
0
    print("Loading SentencePiece model ...")
    text_featurizer = SentencePieceFeaturizer.load_from_file(
        config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
    print("Loading subwords ...")
    text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config,
                                                       args.subwords)
else:
    text_featurizer = CharFeaturizer(config.decoder_config)
text_featurizer.decoder_config.beam_width = args.beam_width

# build model
conformer = Conformer(**config.model_config,
                      vocabulary_size=text_featurizer.num_classes)
conformer.make(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True, skip_mismatch=True)
conformer.summary(line_length=120)
conformer.add_featurizers(speech_featurizer, text_featurizer)

signal = read_raw_audio(args.filename)
features = speech_featurizer.tf_extract(signal)
input_length = math_util.get_reduced_length(
    tf.shape(features)[0], conformer.time_reduction_factor)

if args.beam_width:
    transcript = conformer.recognize_beam(features[None, ...],
                                          input_length[None, ...])
    print("Transcript:", transcript[0].numpy().decode("UTF-8"))
elif args.timestamp:
    transcript, stime, etime, _, _ = conformer.recognize_tflite_with_timestamp(
        signal, tf.constant(text_featurizer.blank, dtype=tf.int32),
Ejemplo n.º 6
0
    print("Loading SentencePiece model ...")
    text_featurizer = SentencePieceFeaturizer.load_from_file(
        config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
    print("Loading subwords ...")
    text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config,
                                                       args.subwords)
else:
    text_featurizer = CharFeaturizer(config.decoder_config)
text_featurizer.decoder_config.beam_width = args.beam_width

# build model
conformer = Conformer(**config.model_config,
                      vocabulary_size=text_featurizer.num_classes)
conformer.make(speech_featurizer.shape)
conformer.load_weights(args.saved)
conformer.summary(line_length=120)
conformer.add_featurizers(speech_featurizer, text_featurizer)

signal = read_raw_audio(args.filename)
features = speech_featurizer.tf_extract(signal)
input_length = math_util.get_reduced_length(
    tf.shape(features)[0], conformer.time_reduction_factor)

if args.beam_width:
    transcript = conformer.recognize_beam(features[None, ...],
                                          input_length[None, ...])
    print("Transcript:", transcript[0].numpy().decode("UTF-8"))
elif args.timestamp:
    transcript, stime, etime, _, _ = conformer.recognize_tflite_with_timestamp(
        signal, tf.constant(text_featurizer.blank, dtype=tf.int32),
Ejemplo n.º 7
0
assert args.saved and args.output

config = Config(args.config)
speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.subwords:
    text_featurizer = SubwordFeaturizer(config.decoder_config)
else:
    text_featurizer = CharFeaturizer(config.decoder_config)

# build model
conformer = Conformer(**config.model_config,
                      vocabulary_size=text_featurizer.num_classes)
conformer.make(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True)
conformer.summary(line_length=100)
conformer.add_featurizers(speech_featurizer, text_featurizer)

concrete_func = conformer.make_tflite_function().get_concrete_function()
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
converter.experimental_new_converter = True
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
]
tflite_model = converter.convert()

args.output = file_util.preprocess_paths(args.output)
with open(args.output, "wb") as tflite_out:
    tflite_out.write(tflite_model)
Ejemplo n.º 8
0
def main(
    config: str = DEFAULT_YAML,
    tfrecords: bool = False,
    sentence_piece: bool = False,
    subwords: bool = True,
    bs: int = None,
    spx: int = 1,
    metadata: str = None,
    static_length: bool = False,
    devices: list = [0],
    mxp: bool = False,
    pretrained: str = None,
):
    tf.keras.backend.clear_session()
    tf.config.optimizer.set_experimental_options({"auto_mixed_precision": mxp})
    strategy = env_util.setup_strategy(devices)

    config = Config(config)

    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    train_dataset, eval_dataset = dataset_helpers.prepare_training_datasets(
        config=config,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        tfrecords=tfrecords,
        metadata=metadata,
    )

    if not static_length:
        speech_featurizer.reset_length()
        text_featurizer.reset_length()

    train_data_loader, eval_data_loader, global_batch_size = dataset_helpers.prepare_training_data_loaders(
        config=config,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        strategy=strategy,
        batch_size=bs,
    )

    with strategy.scope():
        conformer = Conformer(**config.model_config,
                              vocabulary_size=text_featurizer.num_classes)
        conformer.make(speech_featurizer.shape,
                       prediction_shape=text_featurizer.prepand_shape,
                       batch_size=global_batch_size)
        if pretrained:
            conformer.load_weights(pretrained,
                                   by_name=True,
                                   skip_mismatch=True)
        conformer.summary(line_length=100)
        optimizer = tf.keras.optimizers.Adam(
            TransformerSchedule(
                d_model=conformer.dmodel,
                warmup_steps=config.learning_config.optimizer_config.pop(
                    "warmup_steps", 10000),
                max_lr=(0.05 / math.sqrt(conformer.dmodel)),
            ), **config.learning_config.optimizer_config)
        conformer.compile(
            optimizer=optimizer,
            experimental_steps_per_execution=spx,
            global_batch_size=global_batch_size,
            blank=text_featurizer.blank,
        )

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            **config.learning_config.running_config.checkpoint),
        tf.keras.callbacks.experimental.BackupAndRestore(
            config.learning_config.running_config.states_dir),
        tf.keras.callbacks.TensorBoard(
            **config.learning_config.running_config.tensorboard),
    ]

    conformer.fit(
        train_data_loader,
        epochs=config.learning_config.running_config.num_epochs,
        validation_data=eval_data_loader,
        callbacks=callbacks,
        steps_per_epoch=train_dataset.total_steps,
        validation_steps=eval_dataset.total_steps
        if eval_data_loader else None,
    )