def test_non_integer_split_and_save_default(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True) assert clip_df.shape[0] == 1 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[0]["clip_duration"] == 5.0
def test_non_integer_split_and_save_remainder(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True, final_clip="remainder") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 8.2 assert clip_df.iloc[1]["clip_duration"] == 4.2
def test_split_and_save_default(silence_10s_mp3_pathlib): clip_df = split_and_save( Audio.from_file(silence_10s_mp3_pathlib), "unnecessary", "unnecessary", dry_run=True, ) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 4.0 assert clip_df.iloc[1]["end_time"] == 9.0 assert clip_df.iloc[1]["clip_duration"] == 5.0
def test_non_integer_cliplen_split_and_save(silence_10s_mp3_pathlib): clip_df = split_and_save( Audio.from_file(silence_10s_mp3_pathlib), "unnecessary", "unnecessary", 4.5, dry_run=True, ) assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 4.5 assert clip_df.iloc[1]["begin_time"] == 4.5 assert clip_df.iloc[1]["end_time"] == 9.0
def test_non_integer_source_split_and_save_extend(silence_10s_mp3_pathlib): audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2) clip_df = split_and_save(audio, "unnecessary", "unnecessary", 5, dry_run=True, final_clip="extend") assert clip_df.shape[0] == 2 assert clip_df.iloc[0]["begin_time"] == 0.0 assert clip_df.iloc[0]["end_time"] == 5.0 assert clip_df.iloc[1]["begin_time"] == 5.0 assert (clip_df.iloc[1]["end_time"] - 8.2) < 0.1 assert clip_df.iloc[1]["clip_duration"] == 5.0
def entrypoint(): """The Opensoundscape entrypoint for console interaction """ args = docopt(OPSO_DOCOPT, version=f"opensoundscape version {opensoundscape_version}") if args["completions"]: print(COMPLETIONS) elif args["default_config"]: print(DEFAULT_CONFIG) elif args["raven_annotation_check"]: raven.annotation_check(args["<directory>"]) elif args["raven_generate_class_corrections"]: csv = raven.generate_class_corrections(args["<directory>"], lower=args["--lower"]) with open(args["<output.csv>"], "w") as f: f.write(csv) elif args["raven_query_annotations"]: raven.query_annotations(args["<directory>"], args["<class>"]) elif args["split_audio"]: config = get_default_config() if args["--config"]: config = validate_file(args["--config"]) input_p = checks.directory_exists(args, "--input_directory") output_p = checks.directory_exists(args, "--output_directory") segments = Path(args["--segments"]) if segments.exists(): segments.rename(segments.with_suffix(".csv.bak")) wavs = chain( input_p.rglob("**/*.WAV"), input_p.rglob("**/*.wav"), input_p.rglob("**/*.mp3"), input_p.rglob("**/*.MP3"), ) dataset = datasets.SplitterDataset( wavs, annotations=config["raven"]["annotations"], label_corrections=config["raven"]["label_corrections"], overlap=config["audio"]["overlap"], duration=config["audio"]["duration"], output_directory=args["--output_directory"], ) dataloader = DataLoader( dataset, batch_size=config["runtime"]["batch_size"], shuffle=False, num_workers=config["runtime"]["cores_per_node"], collate_fn=datasets.SplitterDataset.collate_fn, ) with open(args["--segments"], "w") as f: if config["raven"]["annotations"]: f.write( "Source,Annotations,Begin (s),End (s),Destination,Labels\n" ) else: f.write("Source,Begin (s),End (s),Destination\n") for idx, data in enumerate(dataloader): for output in data: f.write(f"{output}\n") elif args["predict_from_directory"]: config = get_default_config() if args["--config"]: config = validate_file(args["--config"]) input_p = checks.directory_exists(args, "--input_directory") wavs = chain( input_p.rglob("**/*.WAV"), input_p.rglob("**/*.wav"), input_p.rglob("**/*.mp3"), input_p.rglob("**/*.MP3"), ) with TemporaryDirectory() as segments_dir: dataset = datasets.SplitterDataset( wavs, overlap=config["audio"]["overlap"], duration=config["audio"]["duration"], output_directory=segments_dir, ) dataloader = DataLoader( dataset, batch_size=config["runtime"]["batch_size"], shuffle=False, num_workers=config["runtime"]["cores_per_node"], collate_fn=datasets.SplitterDataset.collate_fn, ) segments_csv = f"{segments_dir}/segments.csv" with open(segments_csv, "w") as f: f.write("Source,Begin (s),End (s),Destination\n") for idx, data in enumerate(dataloader): for output in data: f.write(f"{output}\n") input_df = pd.read_csv(segments_csv) dataset = datasets.SingleTargetAudioDataset(input_df) dataloader = DataLoader( dataset, batch_size=config["runtime"]["batch_size"], shuffle=False, num_workers=config["runtime"]["cores_per_node"], ) try: model = resnet18(pretrained=False) model.fc = nn.Linear(in_features=model.fc.in_features, out_features=2) model.load_state_dict(torch.load(args["--state_dict"])) except: exit( f"I was unable to load the state dictionary from `{args['--state_dict']}`" ) model.eval() with torch.no_grad(): for idx, data in enumerate(dataloader): X = data["X"] predictions = outputs.clone().detach().argmax(dim=1) start = config["runtime"]["batch_size"] * idx end = start + config["runtime"]["batch_size"] for fname, pred in zip(input_df["Destination"][start:end], predictions): print(f"{fname},{pred}") elif args["split_and_save"]: config = get_default_config() if args["--config"]: config = validate_file(args["--config"]) output_p = checks.directory_exists(args, "--output_directory") audio = Audio.from_file(args["--audio_file"], **config["audio"]) clip_df = split_and_save(audio, args["--output_directory"], "segment", **config["split_and_save"]) clip_df.to_csv(args["--segments"], index=None) else: raise NotImplementedError( "The requested command is not implemented. Please submit an issue." )
def raven_audio_split_and_save( raven_directory, audio_directory, destination, col, sample_rate, clip_duration, clip_overlap=0, final_clip=None, extensions=["wav", "WAV", "mp3"], csv_name="labels.csv", labeled_clips_only=False, min_label_len=0, species=None, dry_run=False, verbose=False, ): """Split audio and annotations files simultaneously Splits audio into short clips with the desired overlap. Saves these clips and a one-hot encoded labels CSV into the directory of choice. Labels for csv are selected based on all labels in clips. Requires that audio and annotation filenames are unique, and that the "stem" of annotation filenames is the same as the corresponding stem of the audio filename (Raven saves files using this convention by default). E.g. The following format is correct: audio_directory/audio_file_1.wav raven_directory/audio_file_1.Table.1.selections.txt Args: raven_directory (str or pathlib.Path): The path which contains lowercase Raven annotations file(s) audio_directory (str or pathlib.Path): The path which contains audio file(s) with names the same as annotation files destination (str or pathlib.Path): The path at which to save the splits and the one-hot encoded labels file col (str): The column containing species labels in the Raven files sample_rate (int): Desired sample rate of split audio clips clip_duration (float): Length of each clip clip_overlap (float): Amount of overlap between subsequent clips [default: 0] final_clip (str or None): Behavior if final_clip is less than clip_duration seconds long. [default: None] By default, ignores final clip entirely. Possible options (any other input will ignore the final clip entirely), - "full": Increase the overlap with previous audio to yield a clip with clip_duration length - "remainder": Include the remainder of the Audio (clip will NOT have clip_duration length) - "extend": Similar to remainder but extend the clip with silence to reach clip_duration length - "loop": Similar to remainder but loop (repeat) the clip to reach clip_duration length extensions (list): List of audio filename extensions to look for. [default: `['wav', 'WAV', 'mp3']`] csv_name (str): Filename of the output csv, to be saved in the specified destination [default: 'labels.csv'] min_label_len (float): the minimum amount a label must overlap with the split to be considered a label. Useful for excluding short annotations or annotations that barely overlap the split. For example, if 1, the label will only be included if the annotation is at least 1s long and either starts at least 1s before the end of the split, or ends at least 1s after the start of the split. By default, any label is kept [default: 0] labeled_clips_only (bool): Whether to only save clips that contain labels of the species of interest. [default: False] species (str, list, or None): Species labels to get. If None, gets a list of labels from all selections files. [default: None] dry_run (bool): If True, skip writing audio and just return clip DataFrame [default: False] verbose (bool): If True, prints progress information [default:False] Returns: """ # List all label files all_selections = _get_lower_selections(Path(raven_directory)) # List all audio files audio_directory = Path(audio_directory) all_audio = [ f for f in audio_directory.glob("**/*") if f.suffix.strip(".") in extensions ] # Get audio files and selection files with same stem def _truestem(path_obj): return path_obj.name.split(".")[0] sel_dict = dict( zip([_truestem(Path(s)) for s in all_selections], all_selections)) aud_dict = dict(zip([_truestem(Path(a)) for a in all_audio], all_audio)) keep_keys = set(sel_dict.keys()).intersection(aud_dict.keys()) keep_keys = list(keep_keys) matched_audio = [aud_dict[k] for k in keep_keys] matched_selections = [sel_dict[k] for k in keep_keys] assert len(matched_audio) == len(matched_selections) # Print results for user print( f"Found {len(matched_audio)} sets of matching audio files and selection tables out of {len(all_audio)} audio files and {len(all_selections)} selection tables" ) if (len(all_audio) - len(matched_audio)) > 0 or ( len(all_selections) - len(matched_audio) > 0): if not verbose: print("To see unmatched files, use `verbose = True`") else: print("Unmatched audio files:") print(" " + str(set(all_audio) - set(matched_audio))) print("Unmatched selection tables:") print(" " + str(set(all_selections) - set(matched_selections))) # Get all species in labels file if species is None: species = get_labels_in_dataset(selections_files=matched_selections, col=col) # Create output directory if needed destination = Path(destination) if not destination.exists(): if verbose: print("Making directory", destination) if not dry_run: destination.mkdir() # If saving labeled clips only, don't split audio on first run audio_initial_dry_run = labeled_clips_only | dry_run # for each label file: # run split_and_save on associated audio dfs = [] for idx, (key, aud_file, sel_file) in enumerate( zip(keep_keys, matched_audio, matched_selections)): # Split audio and get corresponding start and end times a = audio.Audio.from_file(aud_file, sample_rate=sample_rate) total_duration = a.duration() begin_end_times_df = audio.split_and_save( audio=a, destination=destination, clip_duration=clip_duration, clip_overlap=clip_overlap, final_clip=final_clip, dry_run=audio_initial_dry_run, prefix=key, ) # Use start and end times to split label file df = split_starts_ends( raven_file=sel_file, col=col, starts=begin_end_times_df["begin_time"].values, ends=begin_end_times_df["end_time"].values, species=species, min_label_len=min_label_len, ) # Keep track of clip filenames df.index = begin_end_times_df.index # For saving only labeled clips: if labeled_clips_only: df = df[pd.DataFrame(df[species]).sum(axis=1) > 0] for clip_name, clip_info in df.iterrows(): seg_start = clip_info["seg_start"] seg_end = clip_info["seg_end"] trimmed = a.trim(seg_start, seg_end) if seg_end > total_duration: if final_clip == "extend": trimmed.extend(clip_duration) elif final_clip == "loop": trimmed.loop(clip_duration) if not dry_run: trimmed.save(clip_name) dfs.append(df) if verbose: print(f"{idx+1}. Finished {aud_file}") # Format dataframes as single df with columns filename, label1, label2, ... one_hot_encoded_df = pd.concat(dfs) one_hot_encoded_df.drop(["seg_start", "seg_end"], axis=1, inplace=True) one_hot_encoded_df.index.name = "filename" # Save labels file if needed if not dry_run: one_hot_encoded_df.to_csv(destination.joinpath(csv_name)) return one_hot_encoded_df