def __init__(self, dataset_dir, manifest_filepath, labels, featurizer, max_duration=None, pad_to_max=False, min_duration=None, blank_index=0, max_utts=0, normalize=True, sort_by_duration=False, trim=False, speed_perturbation=False): """Dataset that loads tensors via a json file containing paths to audio files, transcripts, and durations (in seconds). Each entry is a different audio sample. Args: dataset_dir: absolute path to dataset folder manifest_filepath: relative path from dataset folder to manifest json as described above. Can be coma-separated paths. labels: String containing all the possible characters to map to featurizer: Initialized featurizer class that converts paths of audio to feature tensors max_duration: If audio exceeds this length, do not include in dataset min_duration: If audio is less than this length, do not include in dataset pad_to_max: if specified input sequences into dnn model will be padded to max_duration blank_index: blank index for ctc loss / decoder max_utts: Limit number of utterances normalize: whether to normalize transcript text sort_by_duration: whether or not to sort sequences by increasing duration trim: if specified trims leading and trailing silence from an audio signal. speed_perturbation: specify if using data contains speed perburbation """ m_paths = manifest_filepath.split(',') self.manifest = Manifest(dataset_dir, m_paths, labels, blank_index, pad_to_max=pad_to_max, max_duration=max_duration, sort_by_duration=sort_by_duration, min_duration=min_duration, max_utts=max_utts, normalize=normalize, speed_perturbation=speed_perturbation) self.featurizer = featurizer self.blank_index = blank_index self.trim = trim print( "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours.".format( self.manifest.duration / 3600, self.manifest.filtered_duration / 3600))
def __init__(self, dataset_dir, manifest_filepath, labels, sample_rate=16000, perf_count=None): m_paths = [manifest_filepath] self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels), normalize=True, max_duration=15.0) self.sample_rate = sample_rate self.count = len(self.manifest) perf_count = self.count if perf_count is None else perf_count self.sample_id_to_sample = {} self.qsl = lg.ConstructQSL(self.count, perf_count, self.load_query_samples, self.unload_query_samples) print( "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format( self.manifest.duration / 3600, self.manifest.filtered_duration / 3600, self.count))
def main(): args = get_args() labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] manifest = Manifest(args.dataset_dir, [args.manifest], labels, len(labels), normalize=True, max_duration=15.0) with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh: results = json.load(fh) hypotheses = [] references = [] for result in results: hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist()) references.append(manifest[result["qsl_idx"]]["transcript"]) references = __gather_predictions([references], labels=labels) hypotheses = __gather_predictions([hypotheses], labels=labels) d = dict(predictions=hypotheses, transcripts=references) wer = process_evaluation_epoch(d) print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100))