def save_to_file(
     self,
     filename: str = None,
 ):
     filename = self.decoder_config.vocabulary if filename is None else file_util.preprocess_paths(
         filename)
     filename_prefix = os.path.splitext(filename)[0]
     return self.subwords.save_to_file(filename_prefix)
Example #2
0
 def __init__(self, config: dict = None):
     if not config:
         config = {}
     self.stage = config.pop("stage", None)
     self.data_paths = file_util.preprocess_paths(
         config.pop("data_paths", None))
     self.tfrecords_dir = file_util.preprocess_paths(config.pop(
         "tfrecords_dir", None),
                                                     isdir=True)
     self.tfrecords_shards = config.pop("tfrecords_shards", 16)
     self.shuffle = config.pop("shuffle", False)
     self.cache = config.pop("cache", False)
     self.drop_remainder = config.pop("drop_remainder", True)
     self.buffer_size = config.pop("buffer_size", 100)
     self.use_tf = config.pop("use_tf", False)
     self.augmentations = Augmentation(config.pop("augmentation_config",
                                                  {}))
     for k, v in config.items():
         setattr(self, k, v)
Example #3
0
 def __init__(self, data: Union[str, dict]):
     config = data if isinstance(data, dict) else file_util.load_yaml(
         file_util.preprocess_paths(data))
     self.speech_config = config.pop("speech_config", {})
     self.decoder_config = config.pop("decoder_config", {})
     self.model_config = config.pop("model_config", {})
     self.learning_config = LearningConfig(config.pop(
         "learning_config", {}))
     for k, v in config.items():
         setattr(self, k, v)
 def load_from_file(
     cls,
     decoder_config: dict,
     filename: str = None,
 ):
     dconf = DecoderConfig(decoder_config.copy())
     filename = dconf.vocabulary if filename is None else file_util.preprocess_paths(
         filename)
     filename_prefix = os.path.splitext(filename)[0]
     subwords = tds.deprecated.text.SubwordTextEncoder.load_from_file(
         filename_prefix)
     return cls(decoder_config, subwords)
Example #5
0
    def __init__(self, config: dict = None):
        if not config:
            config = {}
        self.beam_width = config.pop("beam_width", 0)
        self.blank_at_zero = config.pop("blank_at_zero", True)
        self.norm_score = config.pop("norm_score", True)
        self.lm_config = config.pop("lm_config", {})

        self.vocabulary = file_util.preprocess_paths(
            config.pop("vocabulary", None))
        self.target_vocab_size = config.pop("target_vocab_size", 1024)
        self.max_subword_length = config.pop("max_subword_length", 4)
        self.output_path_prefix = file_util.preprocess_paths(
            config.pop("output_path_prefix", None))
        self.model_type = config.pop("model_type", None)
        self.corpus_files = file_util.preprocess_paths(
            config.pop("corpus_files", []))
        self.max_corpus_chars = config.pop("max_corpus_chars", None)
        self.reserved_tokens = config.pop("reserved_tokens", None)

        for k, v in config.items():
            setattr(self, k, v)
 def load_from_file(
     cls,
     decoder_config: dict,
     filename: str = None,
 ):
     if filename is not None:
         filename_prefix = os.path.splitext(
             file_util.preprocess_paths(filename))[0]
     else:
         filename_prefix = decoder_config.get("output_path_prefix", None)
     processor = sp.SentencePieceProcessor()
     processor.load(filename_prefix + ".model")
     return cls(decoder_config, processor)
def convert_tflite(
    model: BaseModel,
    output: str,
):
    concrete_func = model.make_tflite_function().get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.experimental_new_converter = True
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    tflite_model = converter.convert()

    output = file_util.preprocess_paths(output)
    with open(output, "wb") as tflite_out:
        tflite_out.write(tflite_model)
Example #8
0
 def load_metadata(self, metadata: Union[str, dict] = None):
     if metadata is None:
         return
     content = None
     if isinstance(metadata, dict):
         content = metadata
     else:
         metadata = file_util.preprocess_paths(metadata)
         if tf.io.gfile.exists(metadata):
             logger.info(f"Loading metadata from {metadata} ...")
             with tf.io.gfile.GFile(metadata, "r") as f:
                 try:
                     content = json.loads(f.read()).get(self.stage, {})
                 except json.JSONDecodeError:
                     raise ValueError(f"File {metadata} must be in json format")
     if not content:
         return
     self.speech_featurizer.update_length(int(content.get("max_input_length", 0)))
     self.text_featurizer.update_length(int(content.get("max_label_length", 0)))
     self.total_steps = int(content.get("num_entries", 0))
Example #9
0
 def save_metadata(self, metadata: str = None):
     if metadata is None:
         return
     metadata = file_util.preprocess_paths(metadata)
     if tf.io.gfile.exists(metadata):
         with tf.io.gfile.GFile(metadata, "r") as f:
             try:
                 content = json.loads(f.read())
             except json.JSONDecodeError:
                 raise ValueError(f"File {metadata} is currently not in json format. Please update the file")
     else:
         content = {}
     content[self.stage] = {
         "max_input_length": self.speech_featurizer.max_length,
         "max_label_length": self.text_featurizer.max_length,
         "num_entries": self.total_steps,
     }
     with tf.io.gfile.GFile(metadata, "w") as f:
         f.write(json.dumps(content, indent=2))
     logger.info(f"Metadata written to {metadata}")
Example #10
0
 def __init__(self, config: dict = None):
     if not config:
         config = {}
     self.batch_size = config.pop("batch_size", 1)
     self.accumulation_steps = config.pop("accumulation_steps", 1)
     self.num_epochs = config.pop("num_epochs", 20)
     for k, v in config.items():
         setattr(self, k, v)
         if k == "checkpoint":
             if v and v.get("filepath"):
                 file_util.preprocess_paths(v.get("filepath"))
         elif k == "states_dir" and v:
             file_util.preprocess_paths(v)
         elif k == "tensorboard":
             if v and v.get("log_dir"):
                 file_util.preprocess_paths(v.get("log_dir"))
def run_testing(
    model: BaseModel,
    test_dataset: ASRSliceDataset,
    test_data_loader: tf.data.Dataset,
    output: str,
):
    with file_util.save_file(file_util.preprocess_paths(output)) as filepath:
        overwrite = True
        if tf.io.gfile.exists(filepath):
            overwrite = input(f"Overwrite existing result file {filepath} ? (y/n): ").lower() == "y"
        if overwrite:
            results = model.predict(test_data_loader, verbose=1)
            logger.info(f"Saving result to {output} ...")
            with open(filepath, "w") as openfile:
                openfile.write("PATH\tDURATION\tGROUNDTRUTH\tGREEDY\tBEAMSEARCH\n")
                progbar = tqdm(total=test_dataset.total_steps, unit="batch")
                for i, pred in enumerate(results):
                    groundtruth, greedy, beamsearch = [x.decode("utf-8") for x in pred]
                    path, duration, _ = test_dataset.entries[i]
                    openfile.write(f"{path}\t{duration}\t{groundtruth}\t{greedy}\t{beamsearch}\n")
                    progbar.update(1)
                progbar.close()
        app_util.evaluate_results(filepath)
Example #12
0
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.test_dataset_config))

# build model
jasper = Jasper(**config.model_config,
                vocabulary_size=text_featurizer.num_classes)
jasper.make(speech_featurizer.shape)
jasper.load_weights(args.saved)
jasper.summary(line_length=100)
jasper.add_featurizers(speech_featurizer, text_featurizer)

batch_size = args.bs or config.learning_config.running_config.batch_size
test_data_loader = test_dataset.create(batch_size)

with file_util.save_file(file_util.preprocess_paths(args.output)) as filepath:
    overwrite = True
    if tf.io.gfile.exists(filepath):
        overwrite = input(
            f"Overwrite existing result file {filepath} ? (y/n): ").lower(
            ) == "y"
    if overwrite:
        results = jasper.predict(test_data_loader, verbose=1)
        print(f"Saving result to {args.output} ...")
        with open(filepath, "w") as openfile:
            openfile.write("PATH\tDURATION\tGROUNDTRUTH\tGREEDY\tBEAMSEARCH\n")
            progbar = tqdm(total=test_dataset.total_steps, unit="batch")
            for i, pred in enumerate(results):
                groundtruth, greedy, beamsearch = [
                    x.decode('utf-8') for x in pred
                ]
Example #13
0
parser.add_argument("--tfrecords_dir", type=str, default=None, help="Directory to tfrecords")

parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards")

parser.add_argument("--shuffle", default=False, action="store_true", help="Shuffle data or not")

parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model")

parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords")

parser.add_argument("transcripts", nargs="+", type=str, default=None, help="Paths to transcript files")

args = parser.parse_args()

transcripts = preprocess_paths(args.transcripts)
tfrecords_dir = preprocess_paths(args.tfrecords_dir, isdir=True)

config = Config(args.config)

speech_featurizer = speech_featurizers.TFSpeechFeaturizer(config.speech_config)

if args.sentence_piece:
    print("Loading SentencePiece model ...")
    text_featurizer = text_featurizers.SentencePieceFeaturizer.load_from_file(config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
    print("Loading subwords ...")
    text_featurizer = text_featurizers.SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords)
else:
    text_featurizer = text_featurizers.CharFeaturizer(config.decoder_config)
Example #14
0
assert args.saved and args.output

config = Config(args.config)
speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.subwords:
    text_featurizer = SubwordFeaturizer(config.decoder_config)
else:
    text_featurizer = CharFeaturizer(config.decoder_config)

# build model
jasper = Jasper(**config.model_config,
                vocabulary_size=text_featurizer.num_classes)
jasper.make(speech_featurizer.shape)
jasper.load_weights(args.saved, by_name=True)
jasper.summary(line_length=100)
jasper.add_featurizers(speech_featurizer, text_featurizer)

concrete_func = jasper.make_tflite_function().get_concrete_function()
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
converter.experimental_new_converter = True
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
]
tflite_model = converter.convert()

args.output = file_util.preprocess_paths(args.output)
with open(args.output, "wb") as tflite_out:
    tflite_out.write(tflite_model)
parser.add_argument("--dir",
                    "-d",
                    type=str,
                    default=None,
                    help="Directory of dataset")

parser.add_argument("output",
                    type=str,
                    default=None,
                    help="The output .tsv transcript file path")

args = parser.parse_args()

assert args.dir and args.output

args.dir = preprocess_paths(args.dir, isdir=True)
args.output = preprocess_paths(args.output)

transcripts = []

text_files = glob.glob(os.path.join(args.dir, "**", "*.txt"), recursive=True)

for text_file in tqdm(text_files, desc="[Loading]"):
    current_dir = os.path.dirname(text_file)
    with open(text_file, "r", encoding="utf-8") as txt:
        lines = txt.read().splitlines()
    for line in lines:
        line = line.split(" ", maxsplit=1)
        audio_file = os.path.join(current_dir, line[0] + ".flac")
        y, sr = librosa.load(audio_file, sr=None)
        duration = librosa.get_duration(y, sr)
Example #16
0
parser.add_argument("--subwords",
                    type=str,
                    default=None,
                    help="Path to file that stores generated subwords")

parser.add_argument("transcripts",
                    nargs="+",
                    type=str,
                    default=None,
                    help="Paths to transcript files")

args = parser.parse_args()

assert args.metadata is not None, "metadata must be defined"

transcripts = preprocess_paths(args.transcripts)

config = Config(args.config)

speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.sentence_piece:
    print("Loading SentencePiece model ...")
    text_featurizer = SentencePieceFeaturizer.load_from_file(
        config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
    print("Loading subwords ...")
    text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config,
                                                       args.subwords)

dataset = ASRDataset(