コード例 #1
0
def test_feature_set_serialization(format, compressed):
    feature_set = FeatureSet(
        features=[
            Features(
                recording_id='irrelevant',
                channels=0,
                start=0.0,
                duration=20.0,
                type='fbank',
                num_frames=2000,
                num_features=20,
                sampling_rate=16000,
                storage_type='lilcom',
                storage_path='/irrelevant/',
                storage_key='path.llc'
            )
        ]
    )
    with NamedTemporaryFile(suffix='.gz' if compressed else '') as f:
        if format == 'json':
            feature_set.to_json(f.name)
            feature_set_deserialized = FeatureSet.from_json(f.name)
        if format == 'yaml':
            feature_set.to_yaml(f.name)
            feature_set_deserialized = FeatureSet.from_yaml(f.name)
    assert feature_set_deserialized == feature_set
コード例 #2
0
def random_mixed(supervision_manifest: Pathlike, feature_manifest: Pathlike,
                 output_cut_manifest: Pathlike, snr_range: Tuple[float, float],
                 offset_range: Tuple[float, float]):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains supervision regions from SUPERVISION_MANIFEST
    and features supplied by FEATURE_MANIFEST. It first creates a trivial CutSet, splits it into two equal, randomized
    parts and mixes their features.
    The parameters of the mix are controlled via SNR_RANGE and OFFSET_RANGE.
    """
    supervision_set = SupervisionSet.from_json(supervision_manifest)
    feature_set = FeatureSet.from_json(feature_manifest)

    source_cut_set = CutSet.from_manifests(supervisions=supervision_set,
                                           features=feature_set)
    left_cuts, right_cuts = source_cut_set.split(num_splits=2, shuffle=True)

    snrs = np.random.uniform(*snr_range, size=len(left_cuts)).tolist()
    relative_offsets = np.random.uniform(*offset_range,
                                         size=len(left_cuts)).tolist()

    mixed_cut_set = CutSet.from_cuts(
        left_cut.mix(right_cut,
                     offset_other_by=left_cut.duration * relative_offset,
                     snr=snr)
        for left_cut, right_cut, snr, relative_offset in zip(
            left_cuts, right_cuts, snrs, relative_offsets))
    mixed_cut_set.to_json(output_cut_manifest)
コード例 #3
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_feature_set_serialization():
    feature_set = FeatureSet(feature_extractor=FeatureExtractor(),
                             features=[
                                 Features(recording_id='irrelevant',
                                          channel_id=0,
                                          start=0.0,
                                          duration=20.0,
                                          type='fbank',
                                          num_frames=2000,
                                          num_features=20,
                                          storage_type='lilcom',
                                          storage_path='/irrelevant/path.llc')
                             ])
    with NamedTemporaryFile() as f:
        feature_set.to_yaml(f.name)
        feature_set_deserialized = FeatureSet.from_yaml(f.name)
    assert feature_set_deserialized == feature_set
コード例 #4
0
ファイル: test_utils.py プロジェクト: leixin/lhotse
def DummyManifest(type_: Type, *, begin_id: int, end_id: int) -> Manifest:
    if type_ == RecordingSet:
        return RecordingSet.from_recordings(
            dummy_recording(idx) for idx in range(begin_id, end_id))
    if type_ == SupervisionSet:
        return SupervisionSet.from_segments(
            dummy_supervision(idx) for idx in range(begin_id, end_id))
    if type_ == FeatureSet:
        # noinspection PyTypeChecker
        return FeatureSet.from_features(
            dummy_features(idx) for idx in range(begin_id, end_id))
コード例 #5
0
ファイル: test_cut.py プロジェクト: underdogliu/lhotse
def dummy_feature_set_lazy():
    with NamedTemporaryFile(suffix=".jsonl.gz") as f:
        feats = FeatureSet.from_features([
            Features(
                recording_id="rec1",
                channels=0,
                start=0,
                duration=10,
                type="fbank",
                num_frames=1000,
                num_features=23,
                sampling_rate=16000,
                storage_type="lilcom_files",
                storage_path="feats",
                storage_key="dummy.llc",
                frame_shift=0.01,
            )
        ])
        feats.to_file(f.name)
        f.flush()
        yield FeatureSet.from_jsonl_lazy(f.name)
コード例 #6
0
ファイル: test_cut.py プロジェクト: twistedmove/lhotse
def dummy_feature_set():
    return FeatureSet.from_features([
        Features(recording_id='rec1',
                 channels=0,
                 start=0,
                 duration=10,
                 type='fbank',
                 num_frames=1000,
                 num_features=23,
                 sampling_rate=16000,
                 storage_type='lilcom',
                 storage_path='dummy.llc')
    ])
コード例 #7
0
def test_feature_set_prefix_path():
    features = FeatureSet.from_features([
        Features(type='fbank',
                 num_frames=1000,
                 num_features=40,
                 sampling_rate=16000,
                 storage_type='lilcom',
                 storage_path='feats/12345.llc',
                 start=0,
                 duration=10)
    ])
    for feat in features.with_path_prefix('/data'):
        assert feat.storage_path == '/data/feats/12345.llc'
コード例 #8
0
def windowed(feature_manifest: Pathlike, output_cut_manifest: Pathlike,
             cut_duration: float, cut_shift: Optional[float],
             keep_shorter_windows: bool):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST from feature regions in FEATURE_MANIFEST.
    The feature matrices are traversed in windows with CUT_SHIFT increments, creating cuts of constant CUT_DURATION.
    """
    feature_set = FeatureSet.from_json(feature_manifest)
    cut_set = make_windowed_cuts_from_features(
        feature_set=feature_set,
        cut_duration=cut_duration,
        cut_shift=cut_shift,
        keep_shorter_windows=keep_shorter_windows)
    cut_set.to_json(output_cut_manifest)
コード例 #9
0
ファイル: manipulation.py プロジェクト: popcornell/lhotse
def split(manifest: Manifest,
          num_splits: int,
          randomize: bool = False) -> List[Manifest]:
    """Split a manifest into `num_splits` equal parts. The element order can be randomized."""
    num_items = len(manifest)
    if num_splits > num_items:
        raise ValueError(
            f"Cannot split manifest into more chunks ({num_splits}) than its number of items {num_items}"
        )
    chunk_size = int(ceil(num_items / num_splits))
    split_indices = [(i * chunk_size, min(num_items, (i + 1) * chunk_size))
                     for i in range(num_splits)]

    def maybe_randomize(items: Iterable[Any]) -> List[Any]:
        items = list(items)
        if randomize:
            random.shuffle(items)
        return items

    if isinstance(manifest, RecordingSet):
        contents = maybe_randomize(manifest.recordings.items())
        return [
            RecordingSet(recordings=dict(contents[begin:end]))
            for begin, end in split_indices
        ]

    if isinstance(manifest, SupervisionSet):
        contents = maybe_randomize(manifest.segments.items())
        return [
            SupervisionSet(segments=dict(contents[begin:end]))
            for begin, end in split_indices
        ]

    if isinstance(manifest, FeatureSet):
        contents = maybe_randomize(manifest.features)
        return [
            FeatureSet(features=contents[begin:end],
                       feature_extractor=manifest.feature_extractor)
            for begin, end in split_indices
        ]

    if isinstance(manifest, CutSet):
        contents = maybe_randomize(manifest.cuts.items())
        return [
            CutSet(cuts=dict(contents[begin:end]))
            for begin, end in split_indices
        ]

    raise ValueError(f"Unknown type of manifest: {type(manifest)}")
コード例 #10
0
ファイル: test_feature_set.py プロジェクト: zcth428/lhotse
def test_load_features(recording_id: str, channel: int, start: float,
                       duration: float, exception_expectation,
                       expected_num_frames: Optional[float]):
    # just test that it loads
    feature_set = FeatureSet.from_json(
        'test/fixtures/dummy_feats/feature_manifest.json')
    with exception_expectation:
        features = feature_set.load(recording_id,
                                    channel_id=channel,
                                    start=start,
                                    duration=duration)
        # expect a matrix
        assert len(features.shape) == 2
        # expect time as the first dimension
        assert features.shape[0] == expected_num_frames
コード例 #11
0
ファイル: cut.py プロジェクト: popcornell/lhotse
def simple(
        feature_manifest: Pathlike,
        output_cut_manifest: Pathlike,
        supervision_manifest: Optional[Pathlike],
):
    """
    Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains the regions and features supplied by FEATURE_MANIFEST.
    Optionally it can use a SUPERVISION_MANIFEST to select the regions and attach the corresponding supervisions to
    the cuts. This is the simplest way to create Cuts.
    """
    feature_set = FeatureSet.from_yaml(feature_manifest)
    if supervision_manifest is None:
        cut_set = make_cuts_from_features(feature_set)
    else:
        supervision_set = SupervisionSet.from_yaml(supervision_manifest)
        cut_set = make_cuts_from_supervisions(feature_set=feature_set, supervision_set=supervision_set)
    cut_set.to_yaml(output_cut_manifest)
コード例 #12
0
ファイル: test_cut.py プロジェクト: underdogliu/lhotse
def dummy_feature_set():
    return FeatureSet.from_features([
        Features(
            recording_id="rec1",
            channels=0,
            start=0,
            duration=10,
            type="fbank",
            num_frames=1000,
            num_features=23,
            sampling_rate=16000,
            storage_type="lilcom_files",
            storage_path="feats",
            storage_key="dummy.llc",
            frame_shift=0.01,
        )
    ])
コード例 #13
0
def test_feature_set_prefix_path():
    features = FeatureSet.from_features([
        Features(
            type="fbank",
            num_frames=1000,
            num_features=40,
            frame_shift=0.01,
            sampling_rate=16000,
            storage_type="lilcom",
            storage_path="feats/",
            storage_key="12345.llc",
            start=0,
            duration=10,
        )
    ])
    for feat in features.with_path_prefix("/data"):
        assert feat.storage_path == "/data/feats"
コード例 #14
0
ファイル: cut.py プロジェクト: popcornell/lhotse
def make_cuts_from_supervisions(supervision_set: SupervisionSet,
                                feature_set: FeatureSet) -> CutSet:
    """
    Utility that converts a SupervisionSet to a CutSet without any adjustment of the segment boundaries.
    It attaches the relevant features from the corresponding FeatureSet.
    """
    return CutSet.from_cuts(
        Cut(id=str(uuid4()),
            start=supervision.start,
            duration=supervision.duration,
            features=feature_set.find(
                recording_id=supervision.recording_id,
                channel_id=supervision.channel_id,
                start=supervision.start,
                duration=supervision.duration,
            ),
            supervisions=[supervision])
        for idx, supervision in enumerate(supervision_set))
コード例 #15
0
def test_compute_global_stats():
    feature_set = FeatureSet.from_json('test/fixtures/dummy_feats/feature_manifest.json')
    with NamedTemporaryFile() as f:
        stats = feature_set.compute_global_stats(storage_path=f.name)
        f.flush()
        read_stats = pickle.load(f)
    # Post-condition 1: feature dim is consistent
    assert stats['norm_means'].shape == (feature_set[0].num_features,)
    assert stats['norm_stds'].shape == (feature_set[0].num_features,)
    # Post-condition 2: the iterative method yields very close results to
    # the "standard" method.
    true_means = np.mean(np.concatenate([f.load() for f in feature_set]), axis=0)
    true_stds = np.std(np.concatenate([f.load() for f in feature_set]), axis=0)
    np.testing.assert_almost_equal(stats['norm_means'], true_means, decimal=5)
    np.testing.assert_almost_equal(stats['norm_stds'], true_stds, decimal=5)
    # Post-condition 3: the serialization works correctly
    assert (stats['norm_means'] == read_stats['norm_means']).all()
    assert (stats['norm_stds'] == read_stats['norm_stds']).all()
コード例 #16
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_load_features(recording_id: str, channel: int, start: float,
                       duration: float, exception_expectation):
    # just test that it loads
    feature_set = FeatureSet.from_yaml(
        'test/fixtures/dummy_feats/feature_manifest.yml')
    with exception_expectation:
        features = feature_set.load(recording_id,
                                    channel_id=channel,
                                    start=start,
                                    duration=duration)
        # expect a matrix
        assert len(features.shape) == 2
        # expect time as the first dimension
        frame_shift = feature_set.feature_extractor.spectrogram_config.frame_shift
        if duration is not None:
            # left-hand expression ignores the frame_length - "maximize" the number of frames retained
            # also, allow a lee-way of +/- 2 frames
            assert duration / frame_shift == features.shape[0]
        # expect frequency as the second dimension
        assert feature_set.feature_extractor.mfcc_fbank_common_config.num_mel_bins == features.shape[
            1]
コード例 #17
0
def test_load_features_with_default_arguments():
    feature_set = FeatureSet.from_json(
        "test/fixtures/dummy_feats/feature_manifest.json")
    features = feature_set.load("recording-1")
    assert features.shape == (50, 23)
コード例 #18
0
ファイル: test_feature_set.py プロジェクト: popcornell/lhotse
def test_load_features_with_default_arguments():
    feature_set = FeatureSet.from_yaml(
        'test/fixtures/dummy_feats/feature_manifest.yml')
    features = feature_set.load('recording-1')
コード例 #19
0
def libri_features_set():
    return FeatureSet.from_json('test/fixtures/libri/feature_manifest.json.gz')
コード例 #20
0
def feature_set():
    return FeatureSet(
        features=[features("rec-1", 0.0, 600.0), features("rec-2", 0.0, 357.0)]
    )
コード例 #21
0
ファイル: kaldi.py プロジェクト: AmirHussein96/lhotse
def load_kaldi_data_dir(
    path: Pathlike,
    sampling_rate: int,
    frame_shift: Optional[Seconds] = None,
    map_string_to_underscores: Optional[str] = None,
    num_jobs: int = 1,
) -> Tuple[RecordingSet, Optional[SupervisionSet], Optional[FeatureSet]]:
    """
    Load a Kaldi data directory and convert it to a Lhotse RecordingSet and
    SupervisionSet manifests. For this to work, at least the wav.scp file must exist.
    SupervisionSet is created only when a segments file exists.
    All the other files (text, utt2spk, etc.) are optional, and some of them might
    not be handled yet. In particular, feats.scp files are ignored.

    :param map_string_to_underscores: optional string, when specified, we will replace
        all instances of this string in SupervisonSegment IDs to underscores.
        This is to help with handling underscores in Kaldi
        (see :func:`.export_to_kaldi`). This is also done for speaker IDs.
    """
    path = Path(path)
    assert path.is_dir()

    def fix_id(t: str) -> str:
        if map_string_to_underscores is None:
            return t
        return t.replace(map_string_to_underscores, "_")

    # must exist for RecordingSet
    recordings = load_kaldi_text_mapping(path / "wav.scp", must_exist=True)

    with ProcessPoolExecutor(num_jobs) as ex:
        dur_vals = ex.map(get_duration, recordings.values())
    durations = dict(zip(recordings.keys(), dur_vals))

    recording_set = RecordingSet.from_recordings(
        Recording(
            id=recording_id,
            sources=[
                AudioSource(
                    type="command" if path_or_cmd.endswith("|") else "file",
                    channels=[0],
                    source=path_or_cmd[:-1] if path_or_cmd.
                    endswith("|") else path_or_cmd,
                )
            ],
            sampling_rate=sampling_rate,
            num_samples=compute_num_samples(durations[recording_id],
                                            sampling_rate),
            duration=durations[recording_id],
        ) for recording_id, path_or_cmd in recordings.items())

    supervision_set = None
    segments = path / "segments"
    if segments.is_file():
        with segments.open() as f:
            supervision_segments = [
                sup_string.strip().split() for sup_string in f
            ]

        texts = load_kaldi_text_mapping(path / "text")
        speakers = load_kaldi_text_mapping(path / "utt2spk")
        genders = load_kaldi_text_mapping(path / "spk2gender")
        languages = load_kaldi_text_mapping(path / "utt2lang")

        supervision_set = SupervisionSet.from_segments(
            SupervisionSegment(
                id=fix_id(segment_id),
                recording_id=recording_id,
                start=float(start),
                duration=add_durations(
                    float(end), -float(start), sampling_rate=sampling_rate),
                channel=0,
                text=texts[segment_id],
                language=languages[segment_id],
                speaker=fix_id(speakers[segment_id]),
                gender=genders[speakers[segment_id]],
            ) for segment_id, recording_id, start, end in supervision_segments)

    feature_set = None
    feats_scp = path / "feats.scp"
    if feats_scp.exists() and is_module_available("kaldi_native_io"):
        if frame_shift is not None:
            import kaldi_native_io

            from lhotse.features.io import KaldiReader

            feature_set = FeatureSet.from_features(
                Features(
                    type="kaldi_native_io",
                    num_frames=mat_shape.num_rows,
                    num_features=mat_shape.num_cols,
                    frame_shift=frame_shift,
                    sampling_rate=sampling_rate,
                    start=0,
                    duration=mat_shape.num_rows * frame_shift,
                    storage_type=KaldiReader.name,
                    storage_path=str(feats_scp),
                    storage_key=utt_id,
                    recording_id=supervision_set[fix_id(utt_id)].
                    recording_id if supervision_set is not None else utt_id,
                    channels=0,
                ) for utt_id, mat_shape in kaldi_native_io.
                SequentialMatrixShapeReader(f"scp:{feats_scp}"))
        else:
            warnings.warn("Failed to import Kaldi 'feats.scp' to Lhotse: "
                          "frame_shift must be not None. "
                          "Feature import omitted.")

    return recording_set, supervision_set, feature_set
コード例 #22
0
def feature_set():
    return FeatureSet(features=[
        features('rec-1', 0.0, 600.0),
        features('rec-2', 0.0, 357.0)
    ])
コード例 #23
0
def feature_set():
    return FeatureSet(feature_extractor=FeatureExtractor(),
                      features=[
                          features('rec-1', 0.0, 600.0),
                          features('rec-2', 0.0, 357.0)
                      ])