def random_mixed(supervision_manifest: Pathlike, feature_manifest: Pathlike, output_cut_manifest: Pathlike, snr_range: Tuple[float, float], offset_range: Tuple[float, float]): """ Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains supervision regions from SUPERVISION_MANIFEST and features supplied by FEATURE_MANIFEST. It first creates a trivial CutSet, splits it into two equal, randomized parts and mixes their features. The parameters of the mix are controlled via SNR_RANGE and OFFSET_RANGE. """ supervision_set = SupervisionSet.from_json(supervision_manifest) feature_set = FeatureSet.from_json(feature_manifest) source_cut_set = CutSet.from_manifests(supervisions=supervision_set, features=feature_set) left_cuts, right_cuts = source_cut_set.split(num_splits=2, shuffle=True) snrs = np.random.uniform(*snr_range, size=len(left_cuts)).tolist() relative_offsets = np.random.uniform(*offset_range, size=len(left_cuts)).tolist() mixed_cut_set = CutSet.from_cuts( left_cut.mix(right_cut, offset_other_by=left_cut.duration * relative_offset, snr=snr) for left_cut, right_cut, snr, relative_offset in zip( left_cuts, right_cuts, snrs, relative_offsets)) mixed_cut_set.to_json(output_cut_manifest)
def extract(audio_manifest: Pathlike, output_dir: Pathlike, segmentation_manifest: Optional[Pathlike], augmentation: str, feature_manifest: Optional[Pathlike], compressed: bool, lilcom_tick_power: int, root_dir: Optional[Pathlike], num_jobs: int): """ Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR, with one file per recording (or segment). """ audio_set = RecordingSet.from_json(audio_manifest) feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if feature_manifest is not None else Fbank()) # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored) supervision_set = (SupervisionSet.from_json(segmentation_manifest) if segmentation_manifest is not None else None) output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) augmenter = None if augmentation is not None: sampling_rate = next(iter(audio_set)).sampling_rate assert all(rec.sampling_rate == sampling_rate for rec in audio_set), \ "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time." augmenter = WavAugmenter.create_predefined(name=augmentation, sampling_rate=sampling_rate) feature_set_builder = FeatureSetBuilder( feature_extractor=feature_extractor, output_dir=output_dir, root_dir=root_dir, augmenter=augmenter) feature_set_builder.process_and_store_recordings( recordings=audio_set, segmentation=None, # TODO: implement and use compressed=compressed, lilcom_tick_power=lilcom_tick_power, num_jobs=num_jobs)
def supervision_set(): return SupervisionSet.from_json('test/fixtures/supervision.yml')
def external_supervision_set() -> SupervisionSet: return SupervisionSet.from_json( "test/fixtures/supervision.json").with_alignment_from_ctm( "test/fixtures/supervision.ctm")
def external_supervision_set() -> SupervisionSet: return SupervisionSet.from_json('test/fixtures/supervision.json')