Example #1
0
 def __init__(self, dataset_dir, manifest_filepath, labels, featurizer, max_duration=None, pad_to_max=False,
              min_duration=None, blank_index=0, max_utts=0, normalize=True, sort_by_duration=False,
              trim=False, speed_perturbation=False):
     """Dataset that loads tensors via a json file containing paths to audio files, transcripts, and durations
     (in seconds). Each entry is a different audio sample.
     Args:
         dataset_dir: absolute path to dataset folder
         manifest_filepath: relative path from dataset folder to manifest json as described above. Can be coma-separated paths.
         labels: String containing all the possible characters to map to
         featurizer: Initialized featurizer class that converts paths of audio to feature tensors
         max_duration: If audio exceeds this length, do not include in dataset
         min_duration: If audio is less than this length, do not include in dataset
         pad_to_max: if specified input sequences into dnn model will be padded to max_duration
         blank_index: blank index for ctc loss / decoder
         max_utts: Limit number of utterances
         normalize: whether to normalize transcript text
         sort_by_duration: whether or not to sort sequences by increasing duration
         trim: if specified trims leading and trailing silence from an audio signal.
         speed_perturbation: specify if using data contains speed perburbation
     """
     m_paths = manifest_filepath.split(',')
     self.manifest = Manifest(dataset_dir, m_paths, labels, blank_index, pad_to_max=pad_to_max,
                              max_duration=max_duration,
                              sort_by_duration=sort_by_duration,
                              min_duration=min_duration, max_utts=max_utts,
                              normalize=normalize, speed_perturbation=speed_perturbation)
     self.featurizer = featurizer
     self.blank_index = blank_index
     self.trim = trim
     print(
         "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours.".format(
             self.manifest.duration / 3600,
             self.manifest.filtered_duration / 3600))
Example #2
0
 def __init__(self, dataset_dir, manifest_filepath, labels,
              sample_rate=16000, perf_count=None):
     m_paths = [manifest_filepath]
     self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels),
                              normalize=True, max_duration=15.0)
     self.sample_rate = sample_rate
     self.count = len(self.manifest)
     perf_count = self.count if perf_count is None else perf_count
     self.sample_id_to_sample = {}
     self.qsl = lg.ConstructQSL(self.count, perf_count,
                                self.load_query_samples,
                                self.unload_query_samples)
     print(
         "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format(
             self.manifest.duration / 3600,
             self.manifest.filtered_duration / 3600,
             self.count))
Example #3
0
def main():
    args = get_args()
    labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
    manifest = Manifest(args.dataset_dir, [args.manifest], labels, len(labels), normalize=True, max_duration=15.0)
    with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh:
        results = json.load(fh)
    hypotheses = []
    references = []
    for result in results:
        hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist())
        references.append(manifest[result["qsl_idx"]]["transcript"])

    references = __gather_predictions([references], labels=labels)
    hypotheses = __gather_predictions([hypotheses], labels=labels)

    d = dict(predictions=hypotheses,
             transcripts=references)
    wer = process_evaluation_epoch(d)
    print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100))