def test_extract_and_store_features_from_cut_set(cut_set, executor, mix_eagerly): extractor = Fbank() with TemporaryDirectory() as tmpdir, LilcomFilesWriter(tmpdir) as storage: with executor() if executor is not None else no_executor() as ex: cut_set_with_feats = cut_set.compute_and_store_features( extractor=extractor, storage=storage, mix_eagerly=mix_eagerly, executor=ex) # The same number of cuts assert len(cut_set_with_feats) == 2 for orig_cut, feat_cut in zip(cut_set, cut_set_with_feats): # The ID is retained assert orig_cut.id == feat_cut.id # Features were attached assert feat_cut.has_features # Recording is retained unless mixing a MixedCut eagerly should_have_recording = not (mix_eagerly and isinstance(orig_cut, MixedCut)) assert feat_cut.has_recording == should_have_recording cuts = list(cut_set_with_feats) arr = cuts[0].load_features() assert arr.shape[0] == 100 assert arr.shape[1] == extractor.feature_dim(cuts[0].sampling_rate) arr = cuts[1].load_features() assert arr.shape[0] == 300 assert arr.shape[1] == extractor.feature_dim(cuts[0].sampling_rate)
def test_extract_and_store_features_from_cut_set(cut_set, executor, num_jobs, storage_type, mix_eagerly): extractor = Fbank() with TemporaryDirectory() as tmpdir: cut_set_with_feats = cut_set.compute_and_store_features( extractor=extractor, storage_path=tmpdir, num_jobs=num_jobs, mix_eagerly=mix_eagerly, executor=executor() if executor else None, storage_type=storage_type).sort_by_duration( ) # sort by duration to ensure the same order of cuts # The same number of cuts assert len(cut_set_with_feats) == 2 for orig_cut, feat_cut in zip(cut_set, cut_set_with_feats): # The ID is retained assert orig_cut.id == feat_cut.id # Features were attached assert feat_cut.has_features # Recording is retained unless mixing a MixedCut eagerly should_have_recording = not (mix_eagerly and isinstance(orig_cut, MixedCut)) assert feat_cut.has_recording == should_have_recording cuts = list(cut_set_with_feats) arr = cuts[0].load_features() assert arr.shape[0] == 300 assert arr.shape[1] == extractor.feature_dim(cuts[0].sampling_rate) arr = cuts[1].load_features() assert arr.shape[0] == 100 assert arr.shape[1] == extractor.feature_dim(cuts[0].sampling_rate)
def test_extract_and_store_features(cut): extractor = Fbank() with TemporaryDirectory() as tmpdir: cut_with_feats = cut.compute_and_store_features(extractor=extractor, output_dir=tmpdir) arr = cut_with_feats.load_features() assert arr.shape[0] == 100 assert arr.shape[1] == extractor.feature_dim(cut.sampling_rate)
def test_extract_and_store_features(cut): extractor = Fbank(FbankConfig(sampling_rate=8000)) with TemporaryDirectory() as tmpdir, LilcomFilesWriter(tmpdir) as storage: cut_with_feats = cut.compute_and_store_features(extractor=extractor, storage=storage) arr = cut_with_feats.load_features() assert arr.shape[0] == 100 assert arr.shape[1] == extractor.feature_dim(cut.sampling_rate)
def test_extract_and_store_features_from_mixed_cut(cut, mix_eagerly): mixed_cut = cut.append(cut) extractor = Fbank(FbankConfig(sampling_rate=8000)) with TemporaryDirectory() as tmpdir, LilcomFilesWriter(tmpdir) as storage: cut_with_feats = mixed_cut.compute_and_store_features( extractor=extractor, storage=storage, mix_eagerly=mix_eagerly) arr = cut_with_feats.load_features() assert arr.shape[0] == 200 assert arr.shape[1] == extractor.feature_dim(mixed_cut.sampling_rate)
def test_extract_and_store_features_from_mixed_cut(cut, mix_eagerly): mixed_cut = cut.append(cut) extractor = Fbank() with TemporaryDirectory() as tmpdir: cut_with_feats = mixed_cut.compute_and_store_features( extractor=extractor, output_dir=tmpdir, mix_eagerly=mix_eagerly ) arr = cut_with_feats.load_features() assert arr.shape[0] == 200 assert arr.shape[1] == extractor.feature_dim(mixed_cut.sampling_rate)
def test_extract_features(cut): extractor = Fbank(FbankConfig(sampling_rate=8000)) arr = cut.compute_features(extractor=extractor) assert arr.shape[0] == 100 assert arr.shape[1] == extractor.feature_dim(cut.sampling_rate)