def test_load_timestamp_before_warnmode(metadata_wav_str): with pytest.warns(UserWarning): correct_ts = Audio.from_file( metadata_wav_str).metadata["recording_start_time"] local_timestamp = datetime(2018, 4, 4, 0, 0, 0) # 1 year before recording local_timezone = pytz.timezone("UTC") timestamp = local_timezone.localize(local_timestamp) s = Audio.from_file(metadata_wav_str, start_timestamp=timestamp, out_of_bounds_mode="warn") # Assert the start time is the correct, original timestamp and has not been changed assert s.metadata["recording_start_time"] == correct_ts
def test_non_integer_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True) assert clip_df.shape[0] == 1 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[0]["clip_duration"] == 5.0
def test_detect_peak_sequence_cwt(rugr_wav_str): """test detection of ruffed grouse drumming the default parameters might change, but this should always return the same detection. """ rugr_audio = Audio.from_file(rugr_wav_str) detections = sig.detect_peak_sequence_cwt( rugr_audio, sr=400, window_len=10, center_frequency=50, wavelet="morl", peak_threshold=0.2, peak_separation=15 / 400, dt_range=[0.05, 0.8], dy_range=[-0.2, 0], d2y_range=[-0.05, 0.15], max_skip=3, duration_range=[1, 15], points_range=[9, 100], plot=False, ) assert len(detections) == 1 assert detections.iloc[0].seq_len == 24
def test_melspectrogram_to_image_with_reshape(veryshort_wav_str): audio = Audio.from_file(veryshort_wav_str, sample_rate=22050) mel_spec = MelSpectrogram.from_audio(audio) img = mel_spec.to_image(shape=(10, 20)) assert img.size == (10, 20) arr = np.array(img) assert arr.shape == (20, 10, 3)
def test_property_trim_length_is_correct(silence_10s_mp3_str): audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000) duration = audio.duration() for _ in range(100): [first, second] = sorted([uniform(0, duration), uniform(0, duration)]) assert isclose(audio.trim(first, second).duration(), second - first, abs_tol=1e-4)
def test_extend_length_is_correct(silence_10s_mp3_str): audio = Audio.from_file(silence_10s_mp3_str, sample_rate=10000) duration = audio.duration() for _ in range(100): extend_length = uniform(duration, duration * 10) assert isclose(audio.extend(extend_length).duration(), extend_length, abs_tol=1e-4)
def test_load_timestamp_before_recording(metadata_wav_str): with pytest.raises(AudioOutOfBoundsError): local_timestamp = datetime(2018, 4, 4, 0, 0, 0) # 1 year before recording local_timezone = pytz.timezone("UTC") timestamp = local_timezone.localize(local_timestamp) s = Audio.from_file(metadata_wav_str, start_timestamp=timestamp, out_of_bounds_mode="raise")
def test_load_timestamp_after_end_of_recording(metadata_wav_str): with pytest.raises(AudioOutOfBoundsError): local_timestamp = datetime(2021, 4, 4, 0, 0, 0) # 1 year after recording local_timezone = pytz.timezone("US/Eastern") timestamp = local_timezone.localize(local_timestamp) s = Audio.from_file(metadata_wav_str, start_timestamp=timestamp, out_of_bounds_mode="raise")
def test_non_integer_source_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5, dry_run=True) assert clip_df.shape[0] == 1 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0
def test_spectrogram_shape_of_veryshort(veryshort_wav_str): audio = Audio.from_file(veryshort_wav_str, sample_rate=22050) spec = Spectrogram.from_audio(audio, overlap_samples=384) assert spec.spectrogram.shape == (257, 21) assert spec.frequencies.shape == (257, ) assert spec.times.shape == (21, ) assert isclose(spec.window_length(), 0.02321995465, abs_tol=1e-4) assert isclose(spec.window_step(), 0.005804988662, abs_tol=1e-4) assert isclose(spec.duration(), audio.duration(), abs_tol=1e-2) assert isclose(spec.window_start_times()[0], 0, abs_tol=1e-4)
def test_non_integer_cliplen_split_and_save(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib) clip_df = audio.split_and_save("unnecessary", "unnecessary", 4.5, dry_run=True) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 4.5 assert clip_df.iloc[1]["start_time"] == 4.5 assert clip_df.iloc[1]["end_time"] == 9.0
def test_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5.0, dry_run=True) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["start_time"] == 5.0 assert clip_df.iloc[1]["end_time"] == 10.0
def test_non_integer_source_split_and_save_extend(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5, dry_run=True, final_clip="extend") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["start_time"] == 5.0 assert (clip_df.iloc[1]["end_time"] - 10.0) < 0.1
def test_split_and_save_default(silence_10s_mp3_pathlib): clip_df = split_and_save( Audio.from_file(silence_10s_mp3_pathlib), "unnecessary", "unnecessary", dry_run=True, ) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 9.0 assert clip_df.iloc[1]["clip_duration"] == 5.0
def test_non_integer_split_and_save_remainder(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True, final_clip="remainder") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 8.2 assert clip_df.iloc[1]["clip_duration"] == 4.2
def test_non_integer_source_split_and_save_full(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", 5, dry_run=True, final_clip="full") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert abs(clip_df.iloc[1]["begin_time"] - 3.2) < 0.1 assert abs(clip_df.iloc[1]["end_time"] - 8.2) < 0.1 assert clip_df.iloc[1]["clip_duration"] == 5.0
def test_non_integer_overlaplen_split_and_save(silence_10s_mp3_pathlib): clip_df = split_and_save( Audio.from_file(silence_10s_mp3_pathlib), "unnecessary", "unnecessary", 5.0, 0.5, dry_run=True, ) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.5 assert clip_df.iloc[1]["end_time"] == 9.5
def test_ribbit(): path = "./tests/audio/silence_10s.mp3" audio = Audio.from_file(path, sample_rate=22050) spec = Spectrogram.from_audio(audio) scores, times = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], window_len=5.0, noise_bands=[[0, 200]], plot=True, ) assert len(scores) > 0
def test_split_and_save_default_extend(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib) clip_df = audio.split_and_save("unnecessary", "unnecessary", 5.0, 1.0, final_clip="extend", dry_run=True) assert clip_df.shape[0] == 3 assert clip_df.iloc[0]["start_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["start_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 9.0 assert clip_df.iloc[2]["start_time"] == 8.0 assert clip_df.iloc[2]["end_time"] == 13.0
def overlay_random_image(self, original_image, original_length, original_class, original_path): """ Overlay an image from another class Select a random file from a different class. Trim if necessary to the same length as the given image. Overlay the images on top of each other with a weight """ # Select a random file from a different class if self.overlay_class == "different": choose_from = self.df[self.df[self.label_column] != original_class] # Select a random file from a class of choice else: choose_from = self.df[self.df[self.label_column] == self.overlay_class] overlay_path = np.random.choice( choose_from[self.filename_column].values) overlay_audio = Audio.from_file(overlay_path, sample_rate=self.audio_sample_rate) # trim to same length as main clip overlay_audio_length = len( overlay_audio.samples) / overlay_audio.sample_rate if overlay_audio_length < original_length and not self.extend_short_clips: raise ValueError( f"the length of the overlay file ({overlay_audio_length} sec) was less than the length of the file {original_path} ({original_length} sec). To extend short clips, use extend_short_clips=True" ) elif overlay_audio_length != original_length: overlay_audio = self.random_audio_trim(overlay_audio, original_length, overlay_path) overlay_image = self.image_from_audio(overlay_audio, mode="L") # create an image and add blur blur_r = np.random.randint(0, 8) / 10 overlay_image = overlay_image.filter( ImageFilter.GaussianBlur(radius=blur_r)) # Select weight; <0.5 means more emphasis on original image if self.overlay_weight == "random": weight = np.random.randint(2, 5) / 10 else: weight = self.overlay_weight # use a weighted sum to overlay (blend) the images return Image.blend(original_image, overlay_image, weight)
def __getitem__(self, item_idx): row = self.df.iloc[item_idx] audio_path = Path(row[self.filename_column]) audio = Audio.from_file(audio_path, sample_rate=self.audio_sample_rate) # trim to desired length if needed # (if self.random_trim_length is specified, select a clip of that length at random from the original file) audio_length = len(audio.samples) / audio.sample_rate if self.random_trim_length is not None: audio = self.random_audio_trim(audio, audio_length, audio_path) audio_length = self.random_trim_length image = self.image_from_audio(audio, mode="L") # add a blended/overlayed image from another class directly on top for _ in range(self.max_overlay_num): if self.overlay_prob > np.random.uniform(): image = self.overlay_random_image( original_image=image, original_length=audio_length, original_class=row[self.label_column], original_path=audio_path, ) else: break if self.save_dir: image.save(f"{self.save_dir}/{audio_path.stem}_{time()}.png") # apply desired random transformations to image and convert to tensor image = image.convert("RGB") X = self.transform(image) if self.debug: from torchvision.utils import save_image save_image(X, f"{self.debug}/{audio_path.stem}_{time()}.png") # Return data : label pairs (training/validation) if self.label_column: labels = np.array([row[self.label_column]]) return {"X": X, "y": torch.from_numpy(labels)} # Return data only (prediction) return {"X": X}
def test_ribbit_short_audio(veryshort_wav_str): audio = Audio.from_file(veryshort_wav_str, sample_rate=22050) spec = Spectrogram.from_audio(audio, window_samples=512, overlap_samples=256, decibel_limits=(-100, -20)) df = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], clip_duration=5.0, clip_overlap=2.5, final_clip=None, noise_bands=[[0, 200]], plot=False, ) assert len(df) == 0
def test_summarize_top_scores(gpt_path): df = pd.DataFrame(columns=[ "species", "pulse_rate_low", "pulse_rate_high", "low_f", "high_f", "reject_low", "reject_high", "window_length", ]) df.at[0, :] = ["sp1", 5, 10, 1000, 2000, 0, 500, 1.0] df.at[1, :] = ["sp2", 10, 15, 1000, 2000, 0, 500, 1.0] audio = Audio.from_file(gpt_path, sample_rate=32000) spec = Spectrogram.from_audio(audio, overlap_samples=256) df = ribbit.pulse_finder_species_set(spec, df) ribbit.summarize_top_scores(["1", "2"], [df, df], scale_factor=10.0)
def test_pulsefinder_species_set(gpt_path): df = pd.DataFrame(columns=[ "species", "pulse_rate_low", "pulse_rate_high", "low_f", "high_f", "reject_low", "reject_high", "window_length", ]) df.at[0, :] = ["sp1", 5, 10, 1000, 2000, 0, 500, 1.0] df.at[1, :] = ["sp2", 10, 15, 1000, 2000, 0, 500, 1.0] audio = Audio.from_file(gpt_path, sample_rate=32000) spec = Spectrogram.from_audio(audio, overlap_samples=256) df = ribbit.pulse_finder_species_set(spec, df) assert type(df) == pd.DataFrame
def test_ribbit_high_spec_overlap(gpt_path): """spec params should not effect number of clips in results""" audio = Audio.from_file(gpt_path, sample_rate=22050).trim(0, 16) spec = Spectrogram.from_audio(audio, window_samples=512, overlap_samples=500, decibel_limits=(-100, -20)) df = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], clip_duration=5.0, clip_overlap=0, final_clip=None, noise_bands=[[0, 200]], plot=False, ) assert len(df) == 3 assert isclose(max(df["start_time"]), 10.0, abs_tol=1e-4)
def test_ribbit(gpt_path): audio = Audio.from_file(gpt_path, sample_rate=22050).trim(0, 16) spec = Spectrogram.from_audio(audio, window_samples=512, overlap_samples=256, decibel_limits=(-100, -20)) df = ribbit.ribbit( spec, pulse_rate_range=[5, 10], signal_band=[1000, 2000], clip_duration=5.0, clip_overlap=0, final_clip=None, noise_bands=[[0, 200]], plot=False, ) assert len(df) == 3 assert isclose(max(df["score"]), 0.0392323, abs_tol=1e-4)
def test_detect_peak_sequence_cwt_no_results(rugr_wav_str): """tests that empty dataframe is returned (instead of errror) if input audio is shorter than window_length """ rugr_audio = Audio.from_file(rugr_wav_str).trim(0, 1) detections = sig.detect_peak_sequence_cwt( rugr_audio, sr=400, window_len=10, center_frequency=50, wavelet="morl", peak_threshold=0.2, peak_separation=15 / 400, dt_range=[0.05, 0.8], dy_range=[-0.2, 0], d2y_range=[-0.05, 0.15], max_skip=3, duration_range=[1, 15], points_range=[9, 100], plot=False, ) assert len(detections) == 0
def test_detect_peak_sequence_cwt_uneven_length_results(rugr_wav_str): """ this test is for the (resolved) issue #410 in which uneven lengths of detected sequences caused a TypeError """ rugr_audio = Audio.from_file(rugr_wav_str).trim(1, 8).loop(length=20) detections = sig.detect_peak_sequence_cwt( rugr_audio, sr=400, window_len=3, center_frequency=50, wavelet="morl", peak_threshold=0.2, peak_separation=15 / 400, dt_range=[0.05, 0.8], dy_range=[-0.2, 0], d2y_range=[-0.05, 0.15], max_skip=3, duration_range=[1, 15], points_range=[9, 100], plot=False, ) assert len(detections) == 2
def __getitem__(self, item_idx): wav = self.wavs[item_idx] annotation_prefix = self.wavs[item_idx].stem.split(".")[0] if self.annotations: annotation_file = Path( f"{wav.parent}/{annotation_prefix}.Table.1.selections.txt.lower" ) if not annotation_file.is_file(): stderr.write( f"Warning: Found no Raven annotations for {wav}\n") return {"data": []} audio_obj = Audio.from_file(wav) wav_duration = audio_obj.duration() wav_times = np.arange(0.0, wav_duration, wav_duration / len(audio_obj.samples)) if self.annotations: annotation_df = pd.read_csv( annotation_file, sep="\t").sort_values(by=["begin time (s)"]) if self.label_corrections: annotation_df["class"] = annotation_df["class"].fillna("unknown") annotation_df["class"] = annotation_df["class"].apply( lambda cls: self.labels_df[self.labels_df["raw"] == cls][ "corrected"].values[0]) num_segments = ceil( (wav_duration - self.overlap) / (self.duration - self.overlap)) outputs = [] for idx in range(num_segments): if idx == num_segments - 1: if self.include_last_segment: end = wav_duration begin = end - self.duration else: continue else: begin = self.duration * idx - self.overlap * idx end = begin + self.duration if self.annotations: overlaps = annotations_with_overlaps_with_clip( annotation_df, begin, end) unique_string = f"{wav}-{begin}-{end}" destination = f"{self.output_directory}/{get_md5_digest(unique_string)}" if self.annotations: if overlaps.shape[0] > 0: segment_sample_begin = audio_obj.time_to_sample(begin) segment_sample_end = audio_obj.time_to_sample(end) audio_to_write = audio_obj.trim(begin, end) audio_to_write.save(f"{destination}.wav") if idx == num_segments - 1: to_append = [ wav, annotation_file, wav_times[segment_sample_begin], wav_times[-1], f"{destination}.wav", ] else: to_append = [ wav, annotation_file, wav_times[segment_sample_begin], wav_times[segment_sample_end], f"{destination}.wav", ] to_append.append( self.species_separator.join( overlaps["class"].unique())) outputs.append( self.column_separator.join([str(x) for x in to_append])) else: segment_sample_begin = audio_obj.time_to_sample(begin) segment_sample_end = audio_obj.time_to_sample(end) audio_to_write = audio_obj.trim(begin, end) audio_to_write.save(f"{destination}.wav") if idx == num_segments - 1: to_append = [ wav, wav_times[segment_sample_begin], wav_times[-1], f"{destination}.wav", ] else: to_append = [ wav, wav_times[segment_sample_begin], wav_times[segment_sample_end], f"{destination}.wav", ] outputs.append( self.column_separator.join([str(x) for x in to_append])) return {"data": outputs}
def entrypoint(): """The Opensoundscape entrypoint for console interaction """ args = docopt(OPSO_DOCOPT, version=f"opensoundscape version {opensoundscape_version}") if args["completions"]: print(COMPLETIONS) elif args["default_config"]: print(DEFAULT_CONFIG) elif args["raven_annotation_check"]: raven.annotation_check(args["<directory>"]) elif args["raven_generate_class_corrections"]: csv = raven.generate_class_corrections(args["<directory>"], lower=args["--lower"]) with open(args["<output.csv>"], "w") as f: f.write(csv) elif args["raven_query_annotations"]: raven.query_annotations(args["<directory>"], args["<class>"]) elif args["split_audio"]: config = get_default_config() if args["--config"]: config = validate_file(args["--config"]) input_p = checks.directory_exists(args, "--input_directory") output_p = checks.directory_exists(args, "--output_directory") segments = Path(args["--segments"]) if segments.exists(): segments.rename(segments.with_suffix(".csv.bak")) wavs = chain( input_p.rglob("**/*.WAV"), input_p.rglob("**/*.wav"), input_p.rglob("**/*.mp3"), input_p.rglob("**/*.MP3"), ) dataset = datasets.SplitterDataset( wavs, annotations=config["raven"]["annotations"], label_corrections=config["raven"]["label_corrections"], overlap=config["audio"]["overlap"], duration=config["audio"]["duration"], output_directory=args["--output_directory"], ) dataloader = DataLoader( dataset, batch_size=config["runtime"]["batch_size"], shuffle=False, num_workers=config["runtime"]["cores_per_node"], collate_fn=datasets.SplitterDataset.collate_fn, ) with open(args["--segments"], "w") as f: if config["raven"]["annotations"]: f.write( "Source,Annotations,Begin (s),End (s),Destination,Labels\n" ) else: f.write("Source,Begin (s),End (s),Destination\n") for idx, data in enumerate(dataloader): for output in data: f.write(f"{output}\n") elif args["predict_from_directory"]: config = get_default_config() if args["--config"]: config = validate_file(args["--config"]) input_p = checks.directory_exists(args, "--input_directory") wavs = chain( input_p.rglob("**/*.WAV"), input_p.rglob("**/*.wav"), input_p.rglob("**/*.mp3"), input_p.rglob("**/*.MP3"), ) with TemporaryDirectory() as segments_dir: dataset = datasets.SplitterDataset( wavs, overlap=config["audio"]["overlap"], duration=config["audio"]["duration"], output_directory=segments_dir, ) dataloader = DataLoader( dataset, batch_size=config["runtime"]["batch_size"], shuffle=False, num_workers=config["runtime"]["cores_per_node"], collate_fn=datasets.SplitterDataset.collate_fn, ) segments_csv = f"{segments_dir}/segments.csv" with open(segments_csv, "w") as f: f.write("Source,Begin (s),End (s),Destination\n") for idx, data in enumerate(dataloader): for output in data: f.write(f"{output}\n") input_df = pd.read_csv(segments_csv) dataset = datasets.SingleTargetAudioDataset(input_df) dataloader = DataLoader( dataset, batch_size=config["runtime"]["batch_size"], shuffle=False, num_workers=config["runtime"]["cores_per_node"], ) try: model = resnet18(pretrained=False) model.fc = nn.Linear(in_features=model.fc.in_features, out_features=2) model.load_state_dict(torch.load(args["--state_dict"])) except: exit( f"I was unable to load the state dictionary from `{args['--state_dict']}`" ) model.eval() with torch.no_grad(): for idx, data in enumerate(dataloader): X = data["X"] predictions = outputs.clone().detach().argmax(dim=1) start = config["runtime"]["batch_size"] * idx end = start + config["runtime"]["batch_size"] for fname, pred in zip(input_df["Destination"][start:end], predictions): print(f"{fname},{pred}") elif args["split_and_save"]: config = get_default_config() if args["--config"]: config = validate_file(args["--config"]) output_p = checks.directory_exists(args, "--output_directory") audio = Audio.from_file(args["--audio_file"], **config["audio"]) clip_df = split_and_save(audio, args["--output_directory"], "segment", **config["split_and_save"]) clip_df.to_csv(args["--segments"], index=None) else: raise NotImplementedError( "The requested command is not implemented. Please submit an issue." )