def _run_segmentation(self): self.subsegments_manifest_path = os.path.join(self._speaker_dir, 'subsegments.json') self.subsegments_manifest_path = segments_manifest_to_subsegments_manifest( segments_manifest_file=self._speaker_manifest_path, subsegments_manifest_file=self.subsegments_manifest_path, window=self._speaker_params.window_length_in_sec, shift=self._speaker_params.shift_length_in_sec, ) return None
def _run_segmentation(self, window: float, shift: float, scale_tag: str = ''): self.subsegments_manifest_path = os.path.join(self._speaker_dir, f'subsegments{scale_tag}.json') logging.info( f"Subsegmentation for embedding extraction:{scale_tag.replace('_',' ')}, {self.subsegments_manifest_path}" ) self.subsegments_manifest_path = segments_manifest_to_subsegments_manifest( segments_manifest_file=self._speaker_manifest_path, subsegments_manifest_file=self.subsegments_manifest_path, window=window, shift=shift, ) return None
def diarize(self, paths2audio_files: List[str] = None, batch_size: int = 1): """ """ if paths2audio_files: self.paths2audio_files = paths2audio_files else: if self._cfg.diarizer.paths2audio_files is None: raise ValueError("Pass path2audio files either through config or to diarize method") else: self.paths2audio_files = self._cfg.diarizer.paths2audio_files if type(self.paths2audio_files) is str and os.path.isfile(self.paths2audio_files): paths2audio_files = [] with open(self.paths2audio_files, 'r') as path2file: for audiofile in path2file.readlines(): audiofile = audiofile.strip() paths2audio_files.append(audiofile) elif type(self.paths2audio_files) in [list, ListConfig]: paths2audio_files = list(self.paths2audio_files) else: raise ValueError("paths2audio_files must be of type list or path to file containing audio files") self.AUDIO_RTTM_MAP = audio_rttm_map(paths2audio_files, self._cfg.diarizer.path2groundtruth_rttm_files) if self.has_vad_model: logging.info("Performing VAD") mfst_file = self.path2audio_files_to_manifest(paths2audio_files) self._dont_auto_split = False self._split_duration = 50 manifest_vad_input = mfst_file if not self._dont_auto_split: logging.info("Split long audio file to avoid CUDA memory issue") logging.debug("Try smaller split_duration if you still have CUDA memory issue") config = { 'manifest_filepath': mfst_file, 'time_length': self._vad_window_length_in_sec, 'split_duration': self._split_duration, 'num_workers': self._cfg.num_workers, } manifest_vad_input = prepare_manifest(config) else: logging.warning( "If you encounter CUDA memory issue, try splitting manifest entry by split_duration to avoid it." ) self._setup_vad_test_data(manifest_vad_input) self._run_vad(manifest_vad_input) else: if not os.path.exists(self._speaker_manifest_path): raise NotFoundError("Oracle VAD based manifest file not found") self.subsegments_manifest_path = os.path.join(self._out_dir, 'subsegments.json') self.subsegments_manifest_path = segments_manifest_to_subsegments_manifest( segments_manifest_file=self._speaker_manifest_path, subsegments_manifest_file=self.subsegments_manifest_path, window=self._cfg.diarizer.speaker_embeddings.window_length_in_sec, shift=self._cfg.diarizer.speaker_embeddings.shift_length_in_sec, ) self._extract_embeddings(self.subsegments_manifest_path) out_rttm_dir = os.path.join(self._out_dir, 'pred_rttms') os.makedirs(out_rttm_dir, exist_ok=True) perform_diarization( embeddings_file=self._embeddings_file, reco2num=self._num_speakers, manifest_path=self.subsegments_manifest_path, audio_rttm_map=self.AUDIO_RTTM_MAP, out_rttm_dir=out_rttm_dir, max_num_speakers=self.max_num_speakers, )