def test_load_timestamp_before_warnmode(metadata_wav_str): with pytest.warns(UserWarning): correct_ts = Audio.from_file( metadata_wav_str).metadata["recording_start_time"] local_timestamp = datetime(2018, 4, 4, 0, 0, 0) # 1 year before recording local_timezone = pytz.timezone("UTC") timestamp = local_timezone.localize(local_timestamp) s = Audio.from_file(metadata_wav_str, start_timestamp=timestamp, out_of_bounds_mode="warn") # Assert the start time is the correct, original timestamp and has not been changed assert s.metadata["recording_start_time"] == correct_ts
def test_save(silence_10s_mp3_str, saved_wav, out_path): if not exists(out_path): os.system(f"mkdir {out_path}") if exists(saved_wav): os.system(f"rm {saved_wav}") Audio(silence_10s_mp3_str).save(saved_wav) assert exists(saved_wav)
def test_melspectrogram_to_image_with_reshape(veryshort_wav_str): audio = Audio.from_file(veryshort_wav_str, sample_rate=22050) mel_spec = MelSpectrogram.from_audio(audio) img = mel_spec.to_image(shape=(10, 20)) assert img.size == (10, 20) arr = np.array(img) assert arr.shape == (20, 10, 3)
def test_non_integer_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True) assert clip_df.shape[0] == 1 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[0]["clip_duration"] == 5.0
def test_detect_peak_sequence_cwt(rugr_wav_str): """test detection of ruffed grouse drumming the default parameters might change, but this should always return the same detection. """ rugr_audio = Audio.from_file(rugr_wav_str) detections = sig.detect_peak_sequence_cwt( rugr_audio, sr=400, window_len=10, center_frequency=50, wavelet="morl", peak_threshold=0.2, peak_separation=15 / 400, dt_range=[0.05, 0.8], dy_range=[-0.2, 0], d2y_range=[-0.05, 0.15], max_skip=3, duration_range=[1, 15], points_range=[9, 100], plot=False, ) assert len(detections) == 1 assert detections.iloc[0].seq_len == 24
def pulse_finder_file(file, freq_range, pulse_rate_range, window_len, rejection_bands=None, plot=False): """a wrapper for pulse_finder with takes an audio file path as an argument creates the audio object and spectrogram within the function Args: file: path to an audio file freq_range: range to bandpass the spectrogram, in Hz pulse_rate_range: how many pulses per second? (where to look in the fft of the smoothed-amplitude), in Hz rejection_bands: list of frequency bands to subtract from the desired freq_range plot=False : if True, plot figures Returns: array of pulse_score: pulse score (float) for each time window array of time: start time of each window """ # make spectrogram from file path audio = Audio(file) spec = Spectrogram.from_audio(audio) pulse_scores, window_start_times = pulse_finder(spec, freq_range, pulse_rate_range, window_len, rejection_bands, plot) return pulse_scores, window_start_times
def test_property_trim_length_is_correct(silence_10s_mp3_str): audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000) duration = audio.duration() for _ in range(100): [first, second] = sorted([uniform(0, duration), uniform(0, duration)]) assert isclose(audio.trim(first, second).duration(), second - first, abs_tol=1e-4)
def test_extend_length_is_correct(silence_10s_mp3_str): audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000) duration = audio.duration() for _ in range(100): extend_length = uniform(duration, duration * 10) assert isclose(audio.extend(extend_length).duration(), extend_length, abs_tol=1e-4)
def test_non_integer_source_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5, dry_run=True) assert clip_df.shape[0] == 1 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0
def test_load_timestamp_before_recording(metadata_wav_str): with pytest.raises(AudioOutOfBoundsError): local_timestamp = datetime(2018, 4, 4, 0, 0, 0) # 1 year before recording local_timezone = pytz.timezone("UTC") timestamp = local_timezone.localize(local_timestamp) s = Audio.from_file(metadata_wav_str, start_timestamp=timestamp, out_of_bounds_mode="raise")
def test_load_timestamp_after_end_of_recording(metadata_wav_str): with pytest.raises(AudioOutOfBoundsError): local_timestamp = datetime(2021, 4, 4, 0, 0, 0) # 1 year after recording local_timezone = pytz.timezone("US/Eastern") timestamp = local_timezone.localize(local_timestamp) s = Audio.from_file(metadata_wav_str, start_timestamp=timestamp, out_of_bounds_mode="raise")
def test_spectrogram_shape_of_veryshort(veryshort_wav_str): audio = Audio.from_file(veryshort_wav_str, sample_rate=22050) spec = Spectrogram.from_audio(audio, overlap_samples=384) assert spec.spectrogram.shape == (257, 21) assert spec.frequencies.shape == (257, ) assert spec.times.shape == (21, ) assert isclose(spec.window_length(), 0.02321995465, abs_tol=1e-4) assert isclose(spec.window_step(), 0.005804988662, abs_tol=1e-4) assert isclose(spec.duration(), audio.duration(), abs_tol=1e-2) assert isclose(spec.window_start_times()[0], 0, abs_tol=1e-4)
def test_non_integer_cliplen_split_and_save(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib) clip_df = audio.split_and_save("unnecessary", "unnecessary", 4.5, dry_run=True) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 4.5 assert clip_df.iloc[1]["start_time"] == 4.5 assert clip_df.iloc[1]["end_time"] == 9.0
def test_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5.0, dry_run=True) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["start_time"] == 5.0 assert clip_df.iloc[1]["end_time"] == 10.0
def test_non_integer_source_split_and_save_extend(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5, dry_run=True, final_clip="extend") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["start_time"] == 5.0 assert (clip_df.iloc[1]["end_time"] - 10.0) < 0.1
def test_non_integer_split_and_save_remainder(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True, final_clip="remainder") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 8.2 assert clip_df.iloc[1]["clip_duration"] == 4.2
def test_split_and_save_default(silence_10s_mp3_pathlib): clip_df = split_and_save( Audio.from_file(silence_10s_mp3_pathlib), "unnecessary", "unnecessary", dry_run=True, ) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 9.0 assert clip_df.iloc[1]["clip_duration"] == 5.0
def test_non_integer_source_split_and_save_full(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", 5, dry_run=True, final_clip="full") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert abs(clip_df.iloc[1]["begin_time"] - 3.2) < 0.1 assert abs(clip_df.iloc[1]["end_time"] - 8.2) < 0.1 assert clip_df.iloc[1]["clip_duration"] == 5.0
def test_pulse_finder(): path = "./tests/silence_10s.mp3" audio = Audio(path) spec = Spectrogram.from_audio(audio) scores, times = pulse_finder( spec, pulse_rate_range=[5, 10], freq_range=[1000, 2000], window_len=5.0, rejection_bands=[[0, 200]], plot=True, ) assert len(scores) > 0
def test_ribbit(): path = "./tests/audio/silence_10s.mp3" audio = Audio.from_file(path, sample_rate=22050) spec = Spectrogram.from_audio(audio) scores, times = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], window_len=5.0, noise_bands=[[0, 200]], plot=True, ) assert len(scores) > 0
def test_non_integer_overlaplen_split_and_save(silence_10s_mp3_pathlib): clip_df = split_and_save( Audio.from_file(silence_10s_mp3_pathlib), "unnecessary", "unnecessary", 5.0, 0.5, dry_run=True, ) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.5 assert clip_df.iloc[1]["end_time"] == 9.5
def process_audio(func_name, audio_io): """ Check inputs and return spectrogram images Check that inputs are in correct format (single-channel, between 5-20 seconds), and convert to images Args: func_name (str): name of calling function (for printing) audio_io (bytes): Returns: array of 299x299 images, each representing up to 5s of the original audio """ print(f"runserver.py: {func_name}() checking inputs") # Just return error if no data was posted if not audio_io: return {"error": "No data was given with post?"} # Make sure we can load the data given to us print(f"runserver.py: {func_name}() loading samples") try: audio = Audio.from_bytesio( audio_io, sample_rate=22050, resample_type="kaiser_fast" ) except: return {"error": "Unable to load audio, multi-chennel input is ignored"} print( f"runserver.py: {func_name}() loaded samples at sample_rate {audio.sample_rate}" ) # Check the duration is between 5 and 20 seconds duration = audio.duration() if duration < 5: return {"error": "Audio is shorter than 5 seconds"} elif duration > 20: return {"error": "Audio is longer than 20 seconds"} # 1. split audio into 5 second chunks # 2. generate spectrograms # 3. generate images audio_splits = birds_detector.split_audio(audio) spectrograms = [Spectrogram.from_audio(x) for x in audio_splits] images = [x.to_image(shape=(224, 224)) for x in spectrograms] print(f"runserver.py: {func_name}(), opening audio as spectrograms") return {"images": images}
def test_split_and_save_default_extend(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5.0, 1.0, final_clip="extend", dry_run=True) assert clip_df.shape[0] == 3 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["start_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 9.0 assert clip_df.iloc[2]["start_time"] == 8.0 assert clip_df.iloc[2]["end_time"] == 13.0
def overlay_random_image(self, original_image, original_length, original_class, original_path): """ Overlay an image from another class Select a random file from a different class. Trim if necessary to the same length as the given image. Overlay the images on top of each other with a weight """ # Select a random file from a different class if self.overlay_class == "different": choose_from = self.df[self.df[self.label_column] != original_class] # Select a random file from a class of choice else: choose_from = self.df[self.df[self.label_column] == self.overlay_class] overlay_path = np.random.choice( choose_from[self.filename_column].values) overlay_audio = Audio.from_file(overlay_path, sample_rate=self.audio_sample_rate) # trim to same length as main clip overlay_audio_length = len( overlay_audio.samples) / overlay_audio.sample_rate if overlay_audio_length < original_length and not self.extend_short_clips: raise ValueError( f"the length of the overlay file ({overlay_audio_length} sec) was less than the length of the file {original_path} ({original_length} sec). To extend short clips, use extend_short_clips=True" ) elif overlay_audio_length != original_length: overlay_audio = self.random_audio_trim(overlay_audio, original_length, overlay_path) overlay_image = self.image_from_audio(overlay_audio, mode="L") # create an image and add blur blur_r = np.random.randint(0, 8) / 10 overlay_image = overlay_image.filter( ImageFilter.GaussianBlur(radius=blur_r)) # Select weight; <0.5 means more emphasis on original image if self.overlay_weight == "random": weight = np.random.randint(2, 5) / 10 else: weight = self.overlay_weight # use a weighted sum to overlay (blend) the images return Image.blend(original_image, overlay_image, weight)
def __getitem__(self, item_idx): row = self.df.iloc[item_idx] audio_path = Path(row[self.filename_column]) audio = Audio.from_file(audio_path, sample_rate=self.audio_sample_rate) # trim to desired length if needed # (if self.random_trim_length is specified, select a clip of that length at random from the original file) audio_length = len(audio.samples) / audio.sample_rate if self.random_trim_length is not None: audio = self.random_audio_trim(audio, audio_length, audio_path) audio_length = self.random_trim_length image = self.image_from_audio(audio, mode="L") # add a blended/overlayed image from another class directly on top for _ in range(self.max_overlay_num): if self.overlay_prob > np.random.uniform(): image = self.overlay_random_image( original_image=image, original_length=audio_length, original_class=row[self.label_column], original_path=audio_path, ) else: break if self.save_dir: image.save(f"{self.save_dir}/{audio_path.stem}_{time()}.png") # apply desired random transformations to image and convert to tensor image = image.convert("RGB") X = self.transform(image) if self.debug: from torchvision.utils import save_image save_image(X, f"{self.debug}/{audio_path.stem}_{time()}.png") # Return data : label pairs (training/validation) if self.label_column: labels = np.array([row[self.label_column]]) return {"X": X, "y": torch.from_numpy(labels)} # Return data only (prediction) return {"X": X}
def test_ribbit_short_audio(veryshort_wav_str): audio = Audio.from_file(veryshort_wav_str, sample_rate=22050) spec = Spectrogram.from_audio(audio, window_samples=512, overlap_samples=256, decibel_limits=(-100, -20)) df = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], clip_duration=5.0, clip_overlap=2.5, final_clip=None, noise_bands=[[0, 200]], plot=False, ) assert len(df) == 0
def test_summarize_top_scores(gpt_path): df = pd.DataFrame(columns=[ "species", "pulse_rate_low", "pulse_rate_high", "low_f", "high_f", "reject_low", "reject_high", "window_length", ]) df.at[0, :] = ["sp1", 5, 10, 1000, 2000, 0, 500, 1.0] df.at[1, :] = ["sp2", 10, 15, 1000, 2000, 0, 500, 1.0] audio = Audio.from_file(gpt_path, sample_rate=32000) spec = Spectrogram.from_audio(audio, overlap_samples=256) df = ribbit.pulse_finder_species_set(spec, df) ribbit.summarize_top_scores(["1", "2"], [df, df], scale_factor=10.0)
def test_ribbit_high_spec_overlap(gpt_path): """spec params should not effect number of clips in results""" audio = Audio.from_file(gpt_path, sample_rate=22050).trim(0, 16) spec = Spectrogram.from_audio(audio, window_samples=512, overlap_samples=500, decibel_limits=(-100, -20)) df = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], clip_duration=5.0, clip_overlap=0, final_clip=None, noise_bands=[[0, 200]], plot=False, ) assert len(df) == 3 assert isclose(max(df["start_time"]), 10.0, abs_tol=1e-4)
def test_pulsefinder_species_set(gpt_path): df = pd.DataFrame(columns=[ "species", "pulse_rate_low", "pulse_rate_high", "low_f", "high_f", "reject_low", "reject_high", "window_length", ]) df.at[0, :] = ["sp1", 5, 10, 1000, 2000, 0, 500, 1.0] df.at[1, :] = ["sp2", 10, 15, 1000, 2000, 0, 500, 1.0] audio = Audio.from_file(gpt_path, sample_rate=32000) spec = Spectrogram.from_audio(audio, overlap_samples=256) df = ribbit.pulse_finder_species_set(spec, df) assert type(df) == pd.DataFrame
def test_ribbit(gpt_path): audio = Audio.from_file(gpt_path, sample_rate=22050).trim(0, 16) spec = Spectrogram.from_audio(audio, window_samples=512, overlap_samples=256, decibel_limits=(-100, -20)) df = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], clip_duration=5.0, clip_overlap=0, final_clip=None, noise_bands=[[0, 200]], plot=False, ) assert len(df) == 3 assert isclose(max(df["score"]), 0.0392323, abs_tol=1e-4)