コード例 #1
0
    def preprocess(self, num_workers=4, mode=SOURCE_AND_TARGET):
        if not os.path.exists(self.out_dir):
            os.makedirs(self.out_dir)
        executor = ProcessPoolExecutor(max_workers=num_workers)
        if mode in [TARGET_ONLY, SOURCE_AND_TARGET]:
            futures = []
            wav_paths = jsut.WavFileDataSource(
                self.in_dir, subsets=jsut.available_subsets).collect_files()

            for index, wav_path in enumerate(wav_paths):
                futures.append(
                    executor.submit(
                        partial(_process_audio, self.out_dir, index + 1,
                                wav_path)))
            result = [
                future.result() for future in tqdm(futures, desc="targets")
            ]
            self._write_target_metadata(result)
        if mode in [SOURCE_ONLY, SOURCE_AND_TARGET]:
            futures = []
            transcriptions = jsut.TranscriptionDataSource(
                self.in_dir, subsets=jsut.available_subsets).collect_files()
            for index, text in enumerate(transcriptions):
                futures.append(
                    executor.submit(
                        partial(_process_text, self.out_dir, index + 1, text)))
            result = [
                future.result() for future in tqdm(futures, desc="sources")
            ]
            self._write_source_metadata(result)
        executor.shutdown()
コード例 #2
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    transcriptions = jsut.TranscriptionDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()
    wav_paths = jsut.WavFileDataSource(
        in_dir, subsets=jsut.available_subsets).collect_files()

    for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, wav_path, text)))
    return [future.result() for future in tqdm(futures)]