예제 #1
0
    def _run_segmentation(self):

        self.subsegments_manifest_path = os.path.join(self._speaker_dir, 'subsegments.json')
        self.subsegments_manifest_path = segments_manifest_to_subsegments_manifest(
            segments_manifest_file=self._speaker_manifest_path,
            subsegments_manifest_file=self.subsegments_manifest_path,
            window=self._speaker_params.window_length_in_sec,
            shift=self._speaker_params.shift_length_in_sec,
        )

        return None
예제 #2
0
    def _run_segmentation(self, window: float, shift: float, scale_tag: str = ''):

        self.subsegments_manifest_path = os.path.join(self._speaker_dir, f'subsegments{scale_tag}.json')
        logging.info(
            f"Subsegmentation for embedding extraction:{scale_tag.replace('_',' ')}, {self.subsegments_manifest_path}"
        )
        self.subsegments_manifest_path = segments_manifest_to_subsegments_manifest(
            segments_manifest_file=self._speaker_manifest_path,
            subsegments_manifest_file=self.subsegments_manifest_path,
            window=window,
            shift=shift,
        )
        return None
예제 #3
0
    def diarize(self, paths2audio_files: List[str] = None, batch_size: int = 1):
        """
        """

        if paths2audio_files:
            self.paths2audio_files = paths2audio_files
        else:
            if self._cfg.diarizer.paths2audio_files is None:
                raise ValueError("Pass path2audio files either through config or to diarize method")
            else:
                self.paths2audio_files = self._cfg.diarizer.paths2audio_files

        if type(self.paths2audio_files) is str and os.path.isfile(self.paths2audio_files):
            paths2audio_files = []
            with open(self.paths2audio_files, 'r') as path2file:
                for audiofile in path2file.readlines():
                    audiofile = audiofile.strip()
                    paths2audio_files.append(audiofile)

        elif type(self.paths2audio_files) in [list, ListConfig]:
            paths2audio_files = list(self.paths2audio_files)

        else:
            raise ValueError("paths2audio_files must be of type list or path to file containing audio files")

        self.AUDIO_RTTM_MAP = audio_rttm_map(paths2audio_files, self._cfg.diarizer.path2groundtruth_rttm_files)

        if self.has_vad_model:
            logging.info("Performing VAD")
            mfst_file = self.path2audio_files_to_manifest(paths2audio_files)
            self._dont_auto_split = False
            self._split_duration = 50
            manifest_vad_input = mfst_file

            if not self._dont_auto_split:
                logging.info("Split long audio file to avoid CUDA memory issue")
                logging.debug("Try smaller split_duration if you still have CUDA memory issue")
                config = {
                    'manifest_filepath': mfst_file,
                    'time_length': self._vad_window_length_in_sec,
                    'split_duration': self._split_duration,
                    'num_workers': self._cfg.num_workers,
                }
                manifest_vad_input = prepare_manifest(config)
            else:
                logging.warning(
                    "If you encounter CUDA memory issue, try splitting manifest entry by split_duration to avoid it."
                )

            self._setup_vad_test_data(manifest_vad_input)
            self._run_vad(manifest_vad_input)
        else:
            if not os.path.exists(self._speaker_manifest_path):
                raise NotFoundError("Oracle VAD based manifest file not found")

        self.subsegments_manifest_path = os.path.join(self._out_dir, 'subsegments.json')
        self.subsegments_manifest_path = segments_manifest_to_subsegments_manifest(
            segments_manifest_file=self._speaker_manifest_path,
            subsegments_manifest_file=self.subsegments_manifest_path,
            window=self._cfg.diarizer.speaker_embeddings.window_length_in_sec,
            shift=self._cfg.diarizer.speaker_embeddings.shift_length_in_sec,
        )
        self._extract_embeddings(self.subsegments_manifest_path)
        out_rttm_dir = os.path.join(self._out_dir, 'pred_rttms')
        os.makedirs(out_rttm_dir, exist_ok=True)

        perform_diarization(
            embeddings_file=self._embeddings_file,
            reco2num=self._num_speakers,
            manifest_path=self.subsegments_manifest_path,
            audio_rttm_map=self.AUDIO_RTTM_MAP,
            out_rttm_dir=out_rttm_dir,
            max_num_speakers=self.max_num_speakers,
        )