def save_to_file( self, filename: str = None, ): filename = self.decoder_config.vocabulary if filename is None else file_util.preprocess_paths( filename) filename_prefix = os.path.splitext(filename)[0] return self.subwords.save_to_file(filename_prefix)
def __init__(self, config: dict = None): if not config: config = {} self.stage = config.pop("stage", None) self.data_paths = file_util.preprocess_paths( config.pop("data_paths", None)) self.tfrecords_dir = file_util.preprocess_paths(config.pop( "tfrecords_dir", None), isdir=True) self.tfrecords_shards = config.pop("tfrecords_shards", 16) self.shuffle = config.pop("shuffle", False) self.cache = config.pop("cache", False) self.drop_remainder = config.pop("drop_remainder", True) self.buffer_size = config.pop("buffer_size", 100) self.use_tf = config.pop("use_tf", False) self.augmentations = Augmentation(config.pop("augmentation_config", {})) for k, v in config.items(): setattr(self, k, v)
def __init__(self, data: Union[str, dict]): config = data if isinstance(data, dict) else file_util.load_yaml( file_util.preprocess_paths(data)) self.speech_config = config.pop("speech_config", {}) self.decoder_config = config.pop("decoder_config", {}) self.model_config = config.pop("model_config", {}) self.learning_config = LearningConfig(config.pop( "learning_config", {})) for k, v in config.items(): setattr(self, k, v)
def load_from_file( cls, decoder_config: dict, filename: str = None, ): dconf = DecoderConfig(decoder_config.copy()) filename = dconf.vocabulary if filename is None else file_util.preprocess_paths( filename) filename_prefix = os.path.splitext(filename)[0] subwords = tds.deprecated.text.SubwordTextEncoder.load_from_file( filename_prefix) return cls(decoder_config, subwords)
def __init__(self, config: dict = None): if not config: config = {} self.beam_width = config.pop("beam_width", 0) self.blank_at_zero = config.pop("blank_at_zero", True) self.norm_score = config.pop("norm_score", True) self.lm_config = config.pop("lm_config", {}) self.vocabulary = file_util.preprocess_paths( config.pop("vocabulary", None)) self.target_vocab_size = config.pop("target_vocab_size", 1024) self.max_subword_length = config.pop("max_subword_length", 4) self.output_path_prefix = file_util.preprocess_paths( config.pop("output_path_prefix", None)) self.model_type = config.pop("model_type", None) self.corpus_files = file_util.preprocess_paths( config.pop("corpus_files", [])) self.max_corpus_chars = config.pop("max_corpus_chars", None) self.reserved_tokens = config.pop("reserved_tokens", None) for k, v in config.items(): setattr(self, k, v)
def load_from_file( cls, decoder_config: dict, filename: str = None, ): if filename is not None: filename_prefix = os.path.splitext( file_util.preprocess_paths(filename))[0] else: filename_prefix = decoder_config.get("output_path_prefix", None) processor = sp.SentencePieceProcessor() processor.load(filename_prefix + ".model") return cls(decoder_config, processor)
def convert_tflite( model: BaseModel, output: str, ): concrete_func = model.make_tflite_function().get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) converter.experimental_new_converter = True converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] tflite_model = converter.convert() output = file_util.preprocess_paths(output) with open(output, "wb") as tflite_out: tflite_out.write(tflite_model)
def load_metadata(self, metadata: Union[str, dict] = None): if metadata is None: return content = None if isinstance(metadata, dict): content = metadata else: metadata = file_util.preprocess_paths(metadata) if tf.io.gfile.exists(metadata): logger.info(f"Loading metadata from {metadata} ...") with tf.io.gfile.GFile(metadata, "r") as f: try: content = json.loads(f.read()).get(self.stage, {}) except json.JSONDecodeError: raise ValueError(f"File {metadata} must be in json format") if not content: return self.speech_featurizer.update_length(int(content.get("max_input_length", 0))) self.text_featurizer.update_length(int(content.get("max_label_length", 0))) self.total_steps = int(content.get("num_entries", 0))
def save_metadata(self, metadata: str = None): if metadata is None: return metadata = file_util.preprocess_paths(metadata) if tf.io.gfile.exists(metadata): with tf.io.gfile.GFile(metadata, "r") as f: try: content = json.loads(f.read()) except json.JSONDecodeError: raise ValueError(f"File {metadata} is currently not in json format. Please update the file") else: content = {} content[self.stage] = { "max_input_length": self.speech_featurizer.max_length, "max_label_length": self.text_featurizer.max_length, "num_entries": self.total_steps, } with tf.io.gfile.GFile(metadata, "w") as f: f.write(json.dumps(content, indent=2)) logger.info(f"Metadata written to {metadata}")
def __init__(self, config: dict = None): if not config: config = {} self.batch_size = config.pop("batch_size", 1) self.accumulation_steps = config.pop("accumulation_steps", 1) self.num_epochs = config.pop("num_epochs", 20) for k, v in config.items(): setattr(self, k, v) if k == "checkpoint": if v and v.get("filepath"): file_util.preprocess_paths(v.get("filepath")) elif k == "states_dir" and v: file_util.preprocess_paths(v) elif k == "tensorboard": if v and v.get("log_dir"): file_util.preprocess_paths(v.get("log_dir"))
def run_testing( model: BaseModel, test_dataset: ASRSliceDataset, test_data_loader: tf.data.Dataset, output: str, ): with file_util.save_file(file_util.preprocess_paths(output)) as filepath: overwrite = True if tf.io.gfile.exists(filepath): overwrite = input(f"Overwrite existing result file {filepath} ? (y/n): ").lower() == "y" if overwrite: results = model.predict(test_data_loader, verbose=1) logger.info(f"Saving result to {output} ...") with open(filepath, "w") as openfile: openfile.write("PATH\tDURATION\tGROUNDTRUTH\tGREEDY\tBEAMSEARCH\n") progbar = tqdm(total=test_dataset.total_steps, unit="batch") for i, pred in enumerate(results): groundtruth, greedy, beamsearch = [x.decode("utf-8") for x in pred] path, duration, _ = test_dataset.entries[i] openfile.write(f"{path}\t{duration}\t{groundtruth}\t{greedy}\t{beamsearch}\n") progbar.update(1) progbar.close() app_util.evaluate_results(filepath)
speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, **vars(config.learning_config.test_dataset_config)) # build model jasper = Jasper(**config.model_config, vocabulary_size=text_featurizer.num_classes) jasper.make(speech_featurizer.shape) jasper.load_weights(args.saved) jasper.summary(line_length=100) jasper.add_featurizers(speech_featurizer, text_featurizer) batch_size = args.bs or config.learning_config.running_config.batch_size test_data_loader = test_dataset.create(batch_size) with file_util.save_file(file_util.preprocess_paths(args.output)) as filepath: overwrite = True if tf.io.gfile.exists(filepath): overwrite = input( f"Overwrite existing result file {filepath} ? (y/n): ").lower( ) == "y" if overwrite: results = jasper.predict(test_data_loader, verbose=1) print(f"Saving result to {args.output} ...") with open(filepath, "w") as openfile: openfile.write("PATH\tDURATION\tGROUNDTRUTH\tGREEDY\tBEAMSEARCH\n") progbar = tqdm(total=test_dataset.total_steps, unit="batch") for i, pred in enumerate(results): groundtruth, greedy, beamsearch = [ x.decode('utf-8') for x in pred ]
parser.add_argument("--tfrecords_dir", type=str, default=None, help="Directory to tfrecords") parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards") parser.add_argument("--shuffle", default=False, action="store_true", help="Shuffle data or not") parser.add_argument("--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model") parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") parser.add_argument("transcripts", nargs="+", type=str, default=None, help="Paths to transcript files") args = parser.parse_args() transcripts = preprocess_paths(args.transcripts) tfrecords_dir = preprocess_paths(args.tfrecords_dir, isdir=True) config = Config(args.config) speech_featurizer = speech_featurizers.TFSpeechFeaturizer(config.speech_config) if args.sentence_piece: print("Loading SentencePiece model ...") text_featurizer = text_featurizers.SentencePieceFeaturizer.load_from_file(config.decoder_config, args.subwords) elif args.subwords and os.path.exists(args.subwords): print("Loading subwords ...") text_featurizer = text_featurizers.SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords) else: text_featurizer = text_featurizers.CharFeaturizer(config.decoder_config)
assert args.saved and args.output config = Config(args.config) speech_featurizer = TFSpeechFeaturizer(config.speech_config) if args.subwords: text_featurizer = SubwordFeaturizer(config.decoder_config) else: text_featurizer = CharFeaturizer(config.decoder_config) # build model jasper = Jasper(**config.model_config, vocabulary_size=text_featurizer.num_classes) jasper.make(speech_featurizer.shape) jasper.load_weights(args.saved, by_name=True) jasper.summary(line_length=100) jasper.add_featurizers(speech_featurizer, text_featurizer) concrete_func = jasper.make_tflite_function().get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) converter.experimental_new_converter = True converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] tflite_model = converter.convert() args.output = file_util.preprocess_paths(args.output) with open(args.output, "wb") as tflite_out: tflite_out.write(tflite_model)
parser.add_argument("--dir", "-d", type=str, default=None, help="Directory of dataset") parser.add_argument("output", type=str, default=None, help="The output .tsv transcript file path") args = parser.parse_args() assert args.dir and args.output args.dir = preprocess_paths(args.dir, isdir=True) args.output = preprocess_paths(args.output) transcripts = [] text_files = glob.glob(os.path.join(args.dir, "**", "*.txt"), recursive=True) for text_file in tqdm(text_files, desc="[Loading]"): current_dir = os.path.dirname(text_file) with open(text_file, "r", encoding="utf-8") as txt: lines = txt.read().splitlines() for line in lines: line = line.split(" ", maxsplit=1) audio_file = os.path.join(current_dir, line[0] + ".flac") y, sr = librosa.load(audio_file, sr=None) duration = librosa.get_duration(y, sr)
parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") parser.add_argument("transcripts", nargs="+", type=str, default=None, help="Paths to transcript files") args = parser.parse_args() assert args.metadata is not None, "metadata must be defined" transcripts = preprocess_paths(args.transcripts) config = Config(args.config) speech_featurizer = TFSpeechFeaturizer(config.speech_config) if args.sentence_piece: print("Loading SentencePiece model ...") text_featurizer = SentencePieceFeaturizer.load_from_file( config.decoder_config, args.subwords) elif args.subwords and os.path.exists(args.subwords): print("Loading subwords ...") text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords) dataset = ASRDataset(