def test_get_samples_from(self, timit_like_path, timit_like_datapath, timit_like_gtpath): """Test sampling iterator.""" gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath) audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True) mfcc_normalized = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True) sampler = WindowedSegmentSampler([audio_extractor], gt_getter, 10000) iterator = sampler.get_samples_from("train") sample, ground_truth = next(iterator) assert np.sum(ground_truth) == 1 assert sample.shape == (1, 18, 1024) assert np.isclose(ground_truth[gt_getter.get_index_from("h#")], .97, atol=0.01) assert np.isclose(ground_truth[gt_getter.get_index_from("p")], .03, atol=0.01) sampler = WindowedSegmentSampler([audio_extractor, mfcc_normalized], gt_getter, 10000) iterator = sampler.get_samples_from("train") sample, ground_truth = next(iterator) assert sample.shape == (1, 18, 1056) # consume all iterator for _ in iterator: pass audio_extractor_no_pad = WindowedAudio(1024, 512, 16000, normalize=True, padding=False) sampler = WindowedSegmentSampler([audio_extractor_no_pad], gt_getter, 10000) iterator = sampler.get_samples_from("train") sample, ground_truth = next(iterator) assert np.sum(ground_truth) == 1 assert sample.shape == (1, 18, 1024) print(ground_truth) assert np.isclose(ground_truth[gt_getter.get_index_from("h#")], 0.93, atol=0.01) assert np.isclose(ground_truth[gt_getter.get_index_from("p")], 0.05, atol=0.01) assert np.isclose(ground_truth[gt_getter.get_index_from("iy")], 0.02, atol=0.01)
def test_get_samples_from(self, timit_like_path, timit_like_datapath, timit_like_gtpath): """Test sampling iterator.""" gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath) audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True) mfcc_normalized = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True) sampler = FileSampler([audio_extractor], gt_getter) iterator = sampler.get_samples_from("train") sample, ground_truth = next(iterator) assert np.isclose(np.sum(ground_truth), 0.99, atol=0.01) assert sample.shape == (1, 130, 1024) sampler = FileSampler([audio_extractor, mfcc_normalized], gt_getter) iterator = sampler.get_samples_from("train") sample, ground_truth = next(iterator) assert sample.shape == (1, 130, 1056) # consume all iterator for _ in iterator: pass audio_extractor_no_pad = WindowedAudio(1024, 512, 16000, normalize=True, padding=False) sampler = FileSampler([audio_extractor_no_pad], gt_getter, 4000) iterator = sampler.get_samples_from("train") sample, ground_truth = next(iterator) assert np.isclose(np.sum(ground_truth), 0.99, atol=0.01) assert sample.shape == (1, 128, 1024)
def test_init(self, timit_like_path, timit_like_datapath, timit_like_gtpath): """Test instantiation.""" gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath) audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True) mfcc_extractor = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True) try: FileSampler([audio_extractor], gt_getter) FileSampler([audio_extractor, mfcc_extractor], gt_getter) except Exception as exception: pytest.fail(f"Unexpected error: {exception}")
def test_get_output_description(self, timit_like_path, timit_like_datapath, timit_like_gtpath): """Test the description methods.""" gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath) audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True) mfcc_normalized = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True) sampler = WindowedSegmentSampler([audio_extractor, mfcc_normalized], gt_getter, 1536) expected_result = { "samples": { "WindowedAudio": list(range(0, 1024)), "WindowedMFCC": list(range(1024, 1056)) }, "ground_truth": PHON } assert sampler.get_output_description() == expected_result
def test_fill_cpu_ram(self, timit_like_path, timit_like_datapath, timit_like_gtpath): """Test the fill cpu rarm functionality.""" gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath) audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True) mfcc_extractor = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True) sampler = WindowedSegmentSampler([audio_extractor, mfcc_extractor], gt_getter, 8000) try: get_dataloader_fixed_size(sampler, 32, "train") get_dataloader_fixed_size(sampler, 16, "test") except Exception as exception: pytest.fail(f"Unexpected error: {exception}")
def test_windowed(self, testing_audio): """Test the WindowedMFCC class.""" signal, samplerate = testing_audio try: feature_extractor = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True) mfcc_normalized = feature_extractor.process(signal, samplerate) feature_extractor_no = WindowedMFCC(1024, 1024, 16000, n_mfcc=32, normalize=True) mfcc_no_overlap = feature_extractor_no.process(signal, samplerate) except Exception as exception: pytest.fail(f"Unexpected error: {exception}") assert (mfcc_normalized.max() <= 1) and (mfcc_normalized.min() >= -1) assert mfcc_normalized.shape[-1] == 32 assert mfcc_normalized.shape[-2] == 1 + int((len(signal) - 1024 + 1024) / 512) assert mfcc_no_overlap.shape[-1] == 32 assert mfcc_no_overlap.shape[-2] == 1 + int((len(signal) - 1024 + 1024) / 1024)
from audio_loader.features.raw_audio import WindowedAudio from audio_loader.features.mfcc import WindowedMFCC from audio_loader.ground_truth.timit import TimitGroundTruth from audio_loader.samplers.windowed_segments import WindowedSegmentSampler from audio_loader.dl_frontends.pytorch.fill_ram import get_dataloader_fixed_size timit_gt = TimitGroundTruth( join(Path.home(), "data/darpa-timit-acousticphonetic-continuous-speech")) print("groundtruth loaded") raw_feature_processor = WindowedAudio(1024, 512, 16000) raw_sampler = WindowedSegmentSampler([raw_feature_processor], timit_gt, 8000, overlap=0.5) raw_train_dataloader = get_dataloader_fixed_size(raw_sampler, 32, "train") raw_test_dataloader = get_dataloader_fixed_size(raw_sampler, 32, "test") print("raw audio done") mfcc_feature_processor = WindowedMFCC(1024, 512, 16000, 20) mfcc_sampler = WindowedSegmentSampler([mfcc_feature_processor], timit_gt, 8000, overlap=0.5) mfcc_train_dataloader = get_dataloader_fixed_size(mfcc_sampler, 32, "train") mfcc_test_dataloader = get_dataloader_fixed_size(mfcc_sampler, 32, "test") print("mfcc done")