예제 #1
0
    def test_get_samples_from(self, timit_like_path, timit_like_datapath, timit_like_gtpath):
        """Test sampling iterator."""
        gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath)
        audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True)
        mfcc_normalized = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True)

        sampler = WindowedSegmentSampler([audio_extractor], gt_getter, 10000)
        iterator = sampler.get_samples_from("train")
        sample, ground_truth = next(iterator)
        assert np.sum(ground_truth) == 1
        assert sample.shape == (1, 18, 1024)
        assert np.isclose(ground_truth[gt_getter.get_index_from("h#")], .97, atol=0.01)
        assert np.isclose(ground_truth[gt_getter.get_index_from("p")], .03, atol=0.01)

        sampler = WindowedSegmentSampler([audio_extractor, mfcc_normalized], gt_getter, 10000)
        iterator = sampler.get_samples_from("train")
        sample, ground_truth = next(iterator)
        assert sample.shape == (1, 18, 1056)

        # consume all iterator
        for _ in iterator:
            pass

        audio_extractor_no_pad = WindowedAudio(1024, 512, 16000, normalize=True, padding=False)
        sampler = WindowedSegmentSampler([audio_extractor_no_pad], gt_getter, 10000)
        iterator = sampler.get_samples_from("train")
        sample, ground_truth = next(iterator)
        assert np.sum(ground_truth) == 1
        assert sample.shape == (1, 18, 1024)
        print(ground_truth)
        assert np.isclose(ground_truth[gt_getter.get_index_from("h#")], 0.93, atol=0.01)
        assert np.isclose(ground_truth[gt_getter.get_index_from("p")], 0.05, atol=0.01)
        assert np.isclose(ground_truth[gt_getter.get_index_from("iy")], 0.02, atol=0.01)
예제 #2
0
    def test_get_samples_from(self, timit_like_path, timit_like_datapath, timit_like_gtpath):
        """Test sampling iterator."""
        gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath)
        audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True)
        mfcc_normalized = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True)

        sampler = FileSampler([audio_extractor], gt_getter)
        iterator = sampler.get_samples_from("train")
        sample, ground_truth = next(iterator)
        assert np.isclose(np.sum(ground_truth), 0.99, atol=0.01)
        assert sample.shape == (1, 130, 1024)

        sampler = FileSampler([audio_extractor, mfcc_normalized], gt_getter)
        iterator = sampler.get_samples_from("train")
        sample, ground_truth = next(iterator)
        assert sample.shape == (1, 130, 1056)

        # consume all iterator
        for _ in iterator:
            pass

        audio_extractor_no_pad = WindowedAudio(1024, 512, 16000, normalize=True, padding=False)
        sampler = FileSampler([audio_extractor_no_pad], gt_getter, 4000)
        iterator = sampler.get_samples_from("train")
        sample, ground_truth = next(iterator)
        assert np.isclose(np.sum(ground_truth), 0.99, atol=0.01)
        assert sample.shape == (1, 128, 1024)
예제 #3
0
 def test_init(self, timit_like_path, timit_like_datapath, timit_like_gtpath):
     """Test instantiation."""
     gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath)
     audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True)
     mfcc_extractor = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True)
     try:
         FileSampler([audio_extractor], gt_getter)
         FileSampler([audio_extractor, mfcc_extractor], gt_getter)
     except Exception as exception:
         pytest.fail(f"Unexpected  error: {exception}")
예제 #4
0
    def test_get_output_description(self, timit_like_path, timit_like_datapath, timit_like_gtpath):
        """Test the description methods."""
        gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath, timit_like_gtpath)
        audio_extractor = WindowedAudio(1024, 512, 16000, normalize=True, padding=True)
        mfcc_normalized = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True)

        sampler = WindowedSegmentSampler([audio_extractor, mfcc_normalized], gt_getter, 1536)
        expected_result = {
            "samples": {
                "WindowedAudio": list(range(0, 1024)),
                "WindowedMFCC": list(range(1024, 1056))
            },
            "ground_truth": PHON
        }

        assert sampler.get_output_description() == expected_result
예제 #5
0
 def test_fill_cpu_ram(self, timit_like_path, timit_like_datapath,
                       timit_like_gtpath):
     """Test the fill cpu rarm functionality."""
     gt_getter = TimitGroundTruth(timit_like_path, timit_like_datapath,
                                  timit_like_gtpath)
     audio_extractor = WindowedAudio(1024,
                                     512,
                                     16000,
                                     normalize=True,
                                     padding=True)
     mfcc_extractor = WindowedMFCC(1024,
                                   512,
                                   16000,
                                   n_mfcc=32,
                                   normalize=True)
     sampler = WindowedSegmentSampler([audio_extractor, mfcc_extractor],
                                      gt_getter, 8000)
     try:
         get_dataloader_fixed_size(sampler, 32, "train")
         get_dataloader_fixed_size(sampler, 16, "test")
     except Exception as exception:
         pytest.fail(f"Unexpected  error: {exception}")
예제 #6
0
    def test_windowed(self, testing_audio):
        """Test the WindowedMFCC class."""
        signal, samplerate = testing_audio
        try:
            feature_extractor = WindowedMFCC(1024, 512, 16000, n_mfcc=32, normalize=True)
            mfcc_normalized = feature_extractor.process(signal, samplerate)
            feature_extractor_no = WindowedMFCC(1024, 1024, 16000, n_mfcc=32, normalize=True)
            mfcc_no_overlap = feature_extractor_no.process(signal, samplerate)

        except Exception as exception:
            pytest.fail(f"Unexpected  error: {exception}")

        assert (mfcc_normalized.max() <= 1) and (mfcc_normalized.min() >= -1)
        assert mfcc_normalized.shape[-1] == 32
        assert mfcc_normalized.shape[-2] == 1 + int((len(signal) - 1024 + 1024) / 512)
        assert mfcc_no_overlap.shape[-1] == 32
        assert mfcc_no_overlap.shape[-2] == 1 + int((len(signal) - 1024 + 1024) / 1024)
예제 #7
0
from audio_loader.features.raw_audio import WindowedAudio
from audio_loader.features.mfcc import WindowedMFCC
from audio_loader.ground_truth.timit import TimitGroundTruth
from audio_loader.samplers.windowed_segments import WindowedSegmentSampler
from audio_loader.dl_frontends.pytorch.fill_ram import get_dataloader_fixed_size

timit_gt = TimitGroundTruth(
    join(Path.home(), "data/darpa-timit-acousticphonetic-continuous-speech"))
print("groundtruth loaded")

raw_feature_processor = WindowedAudio(1024, 512, 16000)
raw_sampler = WindowedSegmentSampler([raw_feature_processor],
                                     timit_gt,
                                     8000,
                                     overlap=0.5)

raw_train_dataloader = get_dataloader_fixed_size(raw_sampler, 32, "train")
raw_test_dataloader = get_dataloader_fixed_size(raw_sampler, 32, "test")

print("raw audio done")

mfcc_feature_processor = WindowedMFCC(1024, 512, 16000, 20)
mfcc_sampler = WindowedSegmentSampler([mfcc_feature_processor],
                                      timit_gt,
                                      8000,
                                      overlap=0.5)
mfcc_train_dataloader = get_dataloader_fixed_size(mfcc_sampler, 32, "train")
mfcc_test_dataloader = get_dataloader_fixed_size(mfcc_sampler, 32, "test")

print("mfcc done")