Beispiel #1
0
def test_non_integer_split_and_save_default(silence_10s_mp3_pathlib):
    audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2)
    clip_df = split_and_save(audio, "unnecessary", "unnecessary", dry_run=True)
    assert clip_df.shape[0] == 1
    assert clip_df.iloc[0]["begin_time"] == 0.0
    assert clip_df.iloc[0]["end_time"] == 5.0
    assert clip_df.iloc[0]["clip_duration"] == 5.0
Beispiel #2
0
def test_non_integer_split_and_save_remainder(silence_10s_mp3_pathlib):
    audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2)
    clip_df = split_and_save(audio,
                             "unnecessary",
                             "unnecessary",
                             dry_run=True,
                             final_clip="remainder")
    assert clip_df.shape[0] == 2
    assert clip_df.iloc[0]["begin_time"] == 0.0
    assert clip_df.iloc[0]["end_time"] == 5.0
    assert clip_df.iloc[1]["begin_time"] == 4.0
    assert clip_df.iloc[1]["end_time"] == 8.2
    assert clip_df.iloc[1]["clip_duration"] == 4.2
Beispiel #3
0
def test_split_and_save_default(silence_10s_mp3_pathlib):
    clip_df = split_and_save(
        Audio.from_file(silence_10s_mp3_pathlib),
        "unnecessary",
        "unnecessary",
        dry_run=True,
    )
    assert clip_df.shape[0] == 2
    assert clip_df.iloc[0]["begin_time"] == 0.0
    assert clip_df.iloc[0]["end_time"] == 5.0
    assert clip_df.iloc[1]["begin_time"] == 4.0
    assert clip_df.iloc[1]["end_time"] == 9.0
    assert clip_df.iloc[1]["clip_duration"] == 5.0
Beispiel #4
0
def test_non_integer_cliplen_split_and_save(silence_10s_mp3_pathlib):
    clip_df = split_and_save(
        Audio.from_file(silence_10s_mp3_pathlib),
        "unnecessary",
        "unnecessary",
        4.5,
        dry_run=True,
    )
    assert clip_df.shape[0] == 2
    assert clip_df.iloc[0]["begin_time"] == 0.0
    assert clip_df.iloc[0]["end_time"] == 4.5
    assert clip_df.iloc[1]["begin_time"] == 4.5
    assert clip_df.iloc[1]["end_time"] == 9.0
Beispiel #5
0
def test_non_integer_source_split_and_save_extend(silence_10s_mp3_pathlib):
    audio = Audio.from_file(silence_10s_mp3_pathlib).trim(0, 8.2)
    clip_df = split_and_save(audio,
                             "unnecessary",
                             "unnecessary",
                             5,
                             dry_run=True,
                             final_clip="extend")
    assert clip_df.shape[0] == 2
    assert clip_df.iloc[0]["begin_time"] == 0.0
    assert clip_df.iloc[0]["end_time"] == 5.0
    assert clip_df.iloc[1]["begin_time"] == 5.0
    assert (clip_df.iloc[1]["end_time"] - 8.2) < 0.1
    assert clip_df.iloc[1]["clip_duration"] == 5.0
Beispiel #6
0
def entrypoint():
    """The Opensoundscape entrypoint for console interaction
    """

    args = docopt(OPSO_DOCOPT,
                  version=f"opensoundscape version {opensoundscape_version}")

    if args["completions"]:
        print(COMPLETIONS)

    elif args["default_config"]:
        print(DEFAULT_CONFIG)

    elif args["raven_annotation_check"]:
        raven.annotation_check(args["<directory>"])

    elif args["raven_generate_class_corrections"]:
        csv = raven.generate_class_corrections(args["<directory>"],
                                               lower=args["--lower"])
        with open(args["<output.csv>"], "w") as f:
            f.write(csv)

    elif args["raven_query_annotations"]:
        raven.query_annotations(args["<directory>"], args["<class>"])

    elif args["split_audio"]:
        config = get_default_config()
        if args["--config"]:
            config = validate_file(args["--config"])

        input_p = checks.directory_exists(args, "--input_directory")
        output_p = checks.directory_exists(args, "--output_directory")

        segments = Path(args["--segments"])
        if segments.exists():
            segments.rename(segments.with_suffix(".csv.bak"))

        wavs = chain(
            input_p.rglob("**/*.WAV"),
            input_p.rglob("**/*.wav"),
            input_p.rglob("**/*.mp3"),
            input_p.rglob("**/*.MP3"),
        )

        dataset = datasets.SplitterDataset(
            wavs,
            annotations=config["raven"]["annotations"],
            label_corrections=config["raven"]["label_corrections"],
            overlap=config["audio"]["overlap"],
            duration=config["audio"]["duration"],
            output_directory=args["--output_directory"],
        )

        dataloader = DataLoader(
            dataset,
            batch_size=config["runtime"]["batch_size"],
            shuffle=False,
            num_workers=config["runtime"]["cores_per_node"],
            collate_fn=datasets.SplitterDataset.collate_fn,
        )

        with open(args["--segments"], "w") as f:
            if config["raven"]["annotations"]:
                f.write(
                    "Source,Annotations,Begin (s),End (s),Destination,Labels\n"
                )
            else:
                f.write("Source,Begin (s),End (s),Destination\n")
            for idx, data in enumerate(dataloader):
                for output in data:
                    f.write(f"{output}\n")

    elif args["predict_from_directory"]:
        config = get_default_config()
        if args["--config"]:
            config = validate_file(args["--config"])

        input_p = checks.directory_exists(args, "--input_directory")

        wavs = chain(
            input_p.rglob("**/*.WAV"),
            input_p.rglob("**/*.wav"),
            input_p.rglob("**/*.mp3"),
            input_p.rglob("**/*.MP3"),
        )

        with TemporaryDirectory() as segments_dir:
            dataset = datasets.SplitterDataset(
                wavs,
                overlap=config["audio"]["overlap"],
                duration=config["audio"]["duration"],
                output_directory=segments_dir,
            )

            dataloader = DataLoader(
                dataset,
                batch_size=config["runtime"]["batch_size"],
                shuffle=False,
                num_workers=config["runtime"]["cores_per_node"],
                collate_fn=datasets.SplitterDataset.collate_fn,
            )

            segments_csv = f"{segments_dir}/segments.csv"
            with open(segments_csv, "w") as f:
                f.write("Source,Begin (s),End (s),Destination\n")
                for idx, data in enumerate(dataloader):
                    for output in data:
                        f.write(f"{output}\n")

            input_df = pd.read_csv(segments_csv)
            dataset = datasets.SingleTargetAudioDataset(input_df)

            dataloader = DataLoader(
                dataset,
                batch_size=config["runtime"]["batch_size"],
                shuffle=False,
                num_workers=config["runtime"]["cores_per_node"],
            )

            try:
                model = resnet18(pretrained=False)
                model.fc = nn.Linear(in_features=model.fc.in_features,
                                     out_features=2)
                model.load_state_dict(torch.load(args["--state_dict"]))
            except:
                exit(
                    f"I was unable to load the state dictionary from `{args['--state_dict']}`"
                )

            model.eval()
            with torch.no_grad():
                for idx, data in enumerate(dataloader):
                    X = data["X"]
                    predictions = outputs.clone().detach().argmax(dim=1)
                    start = config["runtime"]["batch_size"] * idx
                    end = start + config["runtime"]["batch_size"]
                    for fname, pred in zip(input_df["Destination"][start:end],
                                           predictions):
                        print(f"{fname},{pred}")

    elif args["split_and_save"]:
        config = get_default_config()
        if args["--config"]:
            config = validate_file(args["--config"])

        output_p = checks.directory_exists(args, "--output_directory")

        audio = Audio.from_file(args["--audio_file"], **config["audio"])

        clip_df = split_and_save(audio, args["--output_directory"], "segment",
                                 **config["split_and_save"])

        clip_df.to_csv(args["--segments"], index=None)

    else:
        raise NotImplementedError(
            "The requested command is not implemented. Please submit an issue."
        )
Beispiel #7
0
def raven_audio_split_and_save(
    raven_directory,
    audio_directory,
    destination,
    col,
    sample_rate,
    clip_duration,
    clip_overlap=0,
    final_clip=None,
    extensions=["wav", "WAV", "mp3"],
    csv_name="labels.csv",
    labeled_clips_only=False,
    min_label_len=0,
    species=None,
    dry_run=False,
    verbose=False,
):
    """Split audio and annotations files simultaneously

    Splits audio into short clips with the desired overlap. Saves these clips
    and a one-hot encoded labels CSV into the directory of choice. Labels for
    csv are selected based on all labels in clips.

    Requires that audio and annotation filenames are unique, and that the "stem"
    of annotation filenames is the same as the corresponding stem of the audio
    filename (Raven saves files using this convention by default).

    E.g. The following format is correct:
    audio_directory/audio_file_1.wav
    raven_directory/audio_file_1.Table.1.selections.txt

    Args:
        raven_directory (str or pathlib.Path):  The path which contains lowercase Raven annotations file(s)
        audio_directory (str or pathlib.Path):  The path which contains audio file(s) with names the same as annotation files
        destination (str or pathlib.Path):      The path at which to save the splits and the one-hot encoded labels file
        col (str):                              The column containing species labels in the Raven files
        sample_rate (int):                      Desired sample rate of split audio clips
        clip_duration (float):                  Length of each clip
        clip_overlap (float):                   Amount of overlap between subsequent clips [default: 0]
        final_clip (str or None):               Behavior if final_clip is less than clip_duration seconds long. [default: None]
            By default, ignores final clip entirely.
            Possible options (any other input will ignore the final clip entirely),
                - "full":       Increase the overlap with previous audio to yield a clip with clip_duration length
                - "remainder":  Include the remainder of the Audio (clip will NOT have clip_duration length)
                - "extend":     Similar to remainder but extend the clip with silence to reach clip_duration length
                - "loop":       Similar to remainder but loop (repeat) the clip to reach clip_duration length
        extensions (list):                      List of audio filename extensions to look for. [default: `['wav', 'WAV', 'mp3']`]
        csv_name (str):                         Filename of the output csv, to be saved in the specified destination [default: 'labels.csv']
        min_label_len (float):                  the minimum amount a label must overlap with the split to be considered a label.
                                                Useful for excluding short annotations or annotations that barely overlap the split.
                                                For example, if 1, the label will only be included if the annotation is at least 1s long
                                                and either starts at least 1s before the end of the split, or ends at least 1s
                                                after the start of the split. By default, any label is kept [default: 0]
        labeled_clips_only (bool):              Whether to only save clips that contain labels of the species of interest. [default: False]
        species (str, list, or None):           Species labels to get. If None, gets a list of labels from all selections files. [default: None]
        dry_run (bool):                         If True, skip writing audio and just return clip DataFrame [default: False]
        verbose (bool):                         If True, prints progress information [default:False]

    Returns:
    """

    # List all label files
    all_selections = _get_lower_selections(Path(raven_directory))

    # List all audio files
    audio_directory = Path(audio_directory)
    all_audio = [
        f for f in audio_directory.glob("**/*")
        if f.suffix.strip(".") in extensions
    ]

    # Get audio files and selection files with same stem
    def _truestem(path_obj):
        return path_obj.name.split(".")[0]

    sel_dict = dict(
        zip([_truestem(Path(s)) for s in all_selections], all_selections))
    aud_dict = dict(zip([_truestem(Path(a)) for a in all_audio], all_audio))
    keep_keys = set(sel_dict.keys()).intersection(aud_dict.keys())
    keep_keys = list(keep_keys)
    matched_audio = [aud_dict[k] for k in keep_keys]
    matched_selections = [sel_dict[k] for k in keep_keys]
    assert len(matched_audio) == len(matched_selections)

    # Print results for user
    print(
        f"Found {len(matched_audio)} sets of matching audio files and selection tables out of {len(all_audio)} audio files and {len(all_selections)} selection tables"
    )
    if (len(all_audio) - len(matched_audio)) > 0 or (
            len(all_selections) - len(matched_audio) > 0):
        if not verbose:
            print("To see unmatched files, use `verbose = True`")
        else:
            print("Unmatched audio files:")
            print("  " + str(set(all_audio) - set(matched_audio)))
            print("Unmatched selection tables:")
            print("  " + str(set(all_selections) - set(matched_selections)))

    # Get all species in labels file
    if species is None:
        species = get_labels_in_dataset(selections_files=matched_selections,
                                        col=col)

    # Create output directory if needed
    destination = Path(destination)
    if not destination.exists():
        if verbose:
            print("Making directory", destination)
        if not dry_run:
            destination.mkdir()

    # If saving labeled clips only, don't split audio on first run
    audio_initial_dry_run = labeled_clips_only | dry_run

    # for each label file:
    # run split_and_save on associated audio
    dfs = []
    for idx, (key, aud_file, sel_file) in enumerate(
            zip(keep_keys, matched_audio, matched_selections)):

        # Split audio and get corresponding start and end times
        a = audio.Audio.from_file(aud_file, sample_rate=sample_rate)
        total_duration = a.duration()
        begin_end_times_df = audio.split_and_save(
            audio=a,
            destination=destination,
            clip_duration=clip_duration,
            clip_overlap=clip_overlap,
            final_clip=final_clip,
            dry_run=audio_initial_dry_run,
            prefix=key,
        )

        # Use start and end times to split label file
        df = split_starts_ends(
            raven_file=sel_file,
            col=col,
            starts=begin_end_times_df["begin_time"].values,
            ends=begin_end_times_df["end_time"].values,
            species=species,
            min_label_len=min_label_len,
        )

        # Keep track of clip filenames
        df.index = begin_end_times_df.index

        # For saving only labeled clips:
        if labeled_clips_only:
            df = df[pd.DataFrame(df[species]).sum(axis=1) > 0]
            for clip_name, clip_info in df.iterrows():
                seg_start = clip_info["seg_start"]
                seg_end = clip_info["seg_end"]
                trimmed = a.trim(seg_start, seg_end)
                if seg_end > total_duration:
                    if final_clip == "extend":
                        trimmed.extend(clip_duration)
                    elif final_clip == "loop":
                        trimmed.loop(clip_duration)
                if not dry_run:
                    trimmed.save(clip_name)

        dfs.append(df)

        if verbose:
            print(f"{idx+1}. Finished {aud_file}")

    # Format dataframes as single df with columns filename, label1, label2, ...
    one_hot_encoded_df = pd.concat(dfs)
    one_hot_encoded_df.drop(["seg_start", "seg_end"], axis=1, inplace=True)
    one_hot_encoded_df.index.name = "filename"

    # Save labels file if needed
    if not dry_run:
        one_hot_encoded_df.to_csv(destination.joinpath(csv_name))
    return one_hot_encoded_df