Esempio n. 1
0
def random_mixed(supervision_manifest: Pathlike, feature_manifest: Pathlike,
                 output_cut_manifest: Pathlike, snr_range: Tuple[float, float],
                 offset_range: Tuple[float, float]):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains supervision regions from SUPERVISION_MANIFEST
    and features supplied by FEATURE_MANIFEST. It first creates a trivial CutSet, splits it into two equal, randomized
    parts and mixes their features.
    The parameters of the mix are controlled via SNR_RANGE and OFFSET_RANGE.
    """
    supervision_set = SupervisionSet.from_json(supervision_manifest)
    feature_set = FeatureSet.from_json(feature_manifest)

    source_cut_set = CutSet.from_manifests(supervisions=supervision_set,
                                           features=feature_set)
    left_cuts, right_cuts = source_cut_set.split(num_splits=2, shuffle=True)

    snrs = np.random.uniform(*snr_range, size=len(left_cuts)).tolist()
    relative_offsets = np.random.uniform(*offset_range,
                                         size=len(left_cuts)).tolist()

    mixed_cut_set = CutSet.from_cuts(
        left_cut.mix(right_cut,
                     offset_other_by=left_cut.duration * relative_offset,
                     snr=snr)
        for left_cut, right_cut, snr, relative_offset in zip(
            left_cuts, right_cuts, snrs, relative_offsets))
    mixed_cut_set.to_json(output_cut_manifest)
Esempio n. 2
0
def extract(audio_manifest: Pathlike, output_dir: Pathlike,
            segmentation_manifest: Optional[Pathlike], augmentation: str,
            feature_manifest: Optional[Pathlike], compressed: bool,
            lilcom_tick_power: int, root_dir: Optional[Pathlike],
            num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_json(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_json(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(audio_set)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in audio_set), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmenter=augmenter)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
Esempio n. 3
0
def supervision_set():
    return SupervisionSet.from_json('test/fixtures/supervision.yml')
Esempio n. 4
0
def external_supervision_set() -> SupervisionSet:
    return SupervisionSet.from_json(
        "test/fixtures/supervision.json").with_alignment_from_ctm(
            "test/fixtures/supervision.ctm")
Esempio n. 5
0
def external_supervision_set() -> SupervisionSet:
    return SupervisionSet.from_json('test/fixtures/supervision.json')