def test_load_labels_from_txt(self):
     filename_data, filename_labels = get_labeled_txt()
     df = AudioDataFile().load(filename_data, formatter=AIFFormatter())
     df.load_labels(filename_labels,
                    labels_formatter=TXTLabelsFormatter(),
                    label="whale")
     assert [0, 1] in df.get_labeled_data().labels.unique()
     assert "whale" in df.name_label
 def test_concatenate_data_files(self):
     fns = get_5_file_names()
     dfs = []
     for d in fns:
         dfs.append(AudioDataFile().load(d, formatter=AIFFormatter()))
     big_df = AudioDataFile().concatenate(dfs)
     assert big_df.duration.seconds >= sum(
         [d.duration.seconds for d in dfs])
 def test_copy_dataframe(self):
     filename_data, filename_labels = get_labeled_txt()
     df = AudioDataFile().load(filename_data, formatter=AIFFormatter())
     df.load_labels(filename_labels,
                    labels_formatter=TXTLabelsFormatter(),
                    label="whale")
     new_df = AudioDataFile(df)
     np.testing.assert_equal(new_df.metadata["labels"],
                             df.metadata["labels"])
Beispiel #4
0
 def test_add_remove_datafile(self):
     filename = get_file_name()
     df = AudioDataFile()
     df.load(filename,
                  formatter=AIFFormatter())
     ds = OneDataFileOut()
     ds.add_data_file(df)
     assert len(ds.datafiles) == 1
     ds.remove_data_file(df)
     assert len(ds.datafiles) == 0
    def test_add_windows(self):
        filename_data, filename_labels = get_labeled_txt()
        df = AudioDataFile().load(filename_data, formatter=AIFFormatter())
        df.load_labels(filename_labels,
                       labels_formatter=TXTLabelsFormatter(),
                       label="whale")
        st1 = df.data.index[0]
        en1 = df.data.index[5]
        st2 = df.metadata["labels"][0][0]
        en2 = st2 + 4

        # No windows added yet
        assert df.parameters["number_of_windows"] == 0
        d = df.get_window(0)
        assert all(d == df.data)

        # Add windows
        # First an incorrect one
        with pytest.raises(AttributeError):
            df.add_window(st1 + 100, en1 - 1)

        df.add_window(st1, en1)  # Length 6
        df.add_window(st2, en2)  # Length 4
        assert df.parameters["number_of_windows"] == 2
        assert str(df) == "AudioDataFile (2 windows)"
def test_sliding_windows():
    filename = get_file_name()
    p = SlidingWindows()
    p.parameters["sliding_window_width"] = "13s"
    p.parameters["overlap"] = 0.12
    df = AudioDataFile()
    df.load(filename, formatter=AIFFormatter())
    p.parameters["data"] = df
    new_df = p.transform()
    new_df.get_windows_data_frame()
    assert p.description != ""
 def test_load(self):
     filename = get_file_name()
     df = AudioDataFile()
     # No attribute data
     assert str(df) == "AudioDataFile"
     with pytest.raises(RuntimeError):
         df.duration
     with pytest.raises(RuntimeError):
         df.start_time
     with pytest.raises(RuntimeError):
         df.end_time
     df.load(filename, formatter=AIFFormatter())
     assert df.duration.seconds > 0
     assert str(df) == "AudioDataFile (0 days 00:14:59.999500)"
Beispiel #8
0
 def get_validation(self):
     if not self.parameters["validation"]:
         return None
     for i in list(range(len(self.datafiles) - 1)) + [-1]:
         curr_datafiles = self.datafiles.copy()
         validation = curr_datafiles.pop(i + 1)
         yield AudioDataFile().__class__().concatenate([validation])
Beispiel #9
0
 def get_testing(self):
     if not self.parameters["testing"]:
         return None
     for i in range(len(self.datafiles)):
         curr_datafiles = self.datafiles.copy()
         testing = curr_datafiles.pop(i)
         yield AudioDataFile().__class__().concatenate([testing])
def test_scale():
    file_name = get_file_name()
    df = AudioDataFile().load(file_name, formatter=AIFFormatter())
    p = Scale()
    p.parameters["data"] = df
    result = p.transform()
    assert p.description != ""
    assert type(result) is df.__class__
 def test_save(self):
     filename = get_file_name()
     df = AudioDataFile()
     df.load(filename, formatter=AIFFormatter())
     df.save("tmp.h5", formatter=HDF5Formatter())
     df2 = AudioDataFile().load("tmp.h5", formatter=HDF5Formatter())
     assert df2.duration.seconds > 0
Beispiel #12
0
def test_identity():
    file_name = get_file_name()
    df = AudioDataFile().load(file_name, formatter=AIFFormatter())
    f = Identity()
    f.parameters["data"] = df
    t = f.transform()
    np.testing.assert_allclose(df.data.values.ravel(), t.data.values.ravel())
    assert f.description != ""
    assert t.data.values.ndim == 2
Beispiel #13
0
 def get_training(self):
     if not self.parameters["training"]:
         return None
     for i in list(range(len(self.datafiles) - 1)) + [-1]:
         curr_datafiles = self.datafiles.copy()
         if self.parameters["validation"]:
             curr_datafiles.pop(i + 1)  # i + 1 is validation
         if self.parameters["testing"]:
             curr_datafiles.pop(i)  # i is testing
         yield AudioDataFile().concatenate(curr_datafiles)
Beispiel #14
0
def test_skewness():
    file_name = get_file_name()
    df = AudioDataFile().load(file_name, formatter=AIFFormatter())
    df.data -= df.data.mean()
    f = Skewness()
    f.parameters["data"] = df
    t = f.transform()
    assert t.data.values.shape[1] == 1
    assert f.description != ""
    assert t.data.values.ndim == 2
Beispiel #15
0
def test_spectral_frames():
    file_name = get_file_name()
    df = AudioDataFile().load(file_name, formatter=AIFFormatter())
    df.data -= df.data.mean()
    f = SpectralFrames()
    f.parameters["sampling_rate"] = df.sampling_rate
    f.parameters["data"] = df
    t = f.transform()
    assert t.data.values.shape[0] == 1
    assert f.description != ""
    assert t.data.values.ndim == 2
Beispiel #16
0
def test_mfcc():
    file_name = get_file_name()
    df = AudioDataFile().load(file_name, formatter=AIFFormatter())
    df.data -= df.data.mean()
    f = MFCC()
    f.parameters["sampling_rate"] = df.sampling_rate
    f.parameters["n_components"] = 25
    f.parameters["data"] = df
    t = f.transform()
    assert t.data.values.shape[1] == 25
    assert f.description != ""
    assert t.data.values.ndim == 2
Beispiel #17
0
 def test_get_training_testing_validation_set(self):
     ds = OneDataFileOut()
     file_names = get_5_file_names()
     for filename in file_names:
         ds.add_data_file(AudioDataFile().load(filename,
                                               formatter=AIFFormatter()))
     assert len(ds.datafiles) == 5
     training = ds.get_training()
     testing = ds.get_testing()
     validation = ds.get_validation()
     expected_iterations = ds.iterations
     i = 0
     for tr, te, val in zip(training, testing, validation):
         i += 1
     assert expected_iterations == i
Beispiel #18
0
 def test_get_training_testing_set(self):
     ds = OneDataFileOut()
     ds.parameters["validation"] = False  # Disable validation set generation
     file_names = get_5_file_names()
     for filename in file_names:
         ds.add_data_file(AudioDataFile().load(filename,
                                                    formatter=AIFFormatter()))
     assert len(ds.datafiles) == 5
     training = ds.get_training()
     testing = ds.get_testing()
     expected_iterations = ds.iterations
     i = 0
     for tr, te in zip(training, testing):
         i += 1
     assert expected_iterations == i
    def test_segments_repr(self):
        filename_data, filename_labels = get_labeled_txt()
        df = AudioDataFile().load(filename_data, formatter=AIFFormatter())
        df.load_labels(filename_labels,
                       labels_formatter=TXTLabelsFormatter(),
                       label="whale")
        # Make 5 windows
        st = df.start_time
        step = df.duration / 5
        out = AudioSegments()
        assert str(out) == "AudioSegments"

        for i in range(5):
            df.add_window(st + i * step, st + (i + 1) * step)
            out.add_segment(*df.get_window(i))

        assert str(out) == "AudioSegments (5 segments)"
Beispiel #20
0
    def build_data_file(self, params: dict):
        """"""
        available_data_files = getters.get_available_data_files()
        available_formatters = getters.get_available_formatters()

        # Load every small input data file and concatenate all into the big data file
        dfs = []
        for elem in params["input_data"]:
            self.logger.info(f"Loading and appending file {elem['file_name']}")
            file_name = elem["file_name"]
            data_file_name = elem["data_file"]
            formatter_name = elem["formatter"]
            df = available_data_files[data_file_name]()
            fmt = available_formatters[formatter_name]()
            df.load(file_name=file_name, formatter=fmt)
            dfs.append(df)
        big_df = AudioDataFile().concatenate(dfs)

        return {"input_data": big_df}
    def load_feature_datafile(self):
        filename_data, filename_labels = get_labeled_txt()
        df = AudioDataFile().load(filename_data, formatter=AIFFormatter())
        df.load_labels(filename_labels,
                       labels_formatter=TXTLabelsFormatter(),
                       label="whale")
        # Make 5 windows
        st = df.start_time
        step = df.duration / 5
        out = AudioSegments()
        for i in range(5):
            df.add_window(st + i * step, st + (i + 1) * step)
            out.add_segment(*df.get_window(i))
        df = out

        # Get features
        f1 = Energy()
        f1.parameters["data"] = df
        f2 = Kurtosis()
        f2.parameters["data"] = df
        f1 = f1.transform()
        f2 = f2.transform()

        return FeatureDataFile().concatenate([f1, f2])
 def test_sampling_rate(self):
     filename = get_file_name()
     df = AudioDataFile()
     assert df.sampling_rate is None
     df.load(filename, formatter=AIFFormatter())
     assert df.sampling_rate > 0
    def test_get_window(self):
        filename_data, filename_labels = get_labeled_txt()
        df = AudioDataFile().load(filename_data, formatter=AIFFormatter())
        df.load_labels(filename_labels,
                       labels_formatter=TXTLabelsFormatter(),
                       label="whale")
        st1 = df.data.index[0]
        en1 = df.data.index[5]
        st2 = df.metadata["labels"][0][0]
        en2 = st2 + 4

        df.add_window(st1, en1)  # Length 6
        df.add_window(st2, en2)  # Length 4
        wdf = df.get_windows_data_frame()
        assert len(wdf) == 2
        w1, l1 = df.get_window(0)
        w2, l2 = df.get_window(1)
        assert len(w1) == 6
        assert len(w2) == 4

        # Out of range index
        assert df.get_window(100) is None

        # Other label treatments
        df.parameters["labels_treatment"] = "mode"
        df.get_window(0)
        df.parameters["labels_treatment"] = "mean"
        _, l1mn = df.get_window(0)
        assert type(l1mn) is float
        with pytest.raises(ValueError):
            df.parameters["labels_treatment"] = "fourier"
            # There is actually no labels treatment called fourier
            df.get_window(0)  # Raises value error