Python AudioFileDataset Examples, opensoundscape.torch.datasets.AudioFileDataset Python Examples

Example #1

0

Show file

def test_overlay_no_valid_samples(dataset_df, overlay_df_all_positive):
    pre = SpectrogramPreprocessor(sample_duration=2.0,
                                  overlay_df=overlay_df_all_positive)
    dataset = AudioFileDataset(dataset_df, pre)
    dataset.preprocessor.pipeline.overlay.set(overlay_class="different")
    with pytest.raises(PreprocessingError):
        sample1 = dataset[0]["X"]  # no samples with "different" labels

Example #2

0

Show file

def test_audio_file_dataset(dataset_df, pre):
    """should return tensor and labels"""
    pre.bypass_augmentation = False
    dataset = AudioFileDataset(dataset_df, pre)
    sample1 = dataset[0]["X"]
    assert sample1.numpy().shape == (3, 224, 224)
    assert dataset[0]["y"].numpy().shape == (2, )

Example #3

0

Show file

def test_overlay_update_labels(dataset_df, overlay_pre):
    """should return different images each time"""
    dataset = AudioFileDataset(dataset_df, overlay_pre)

    dataset.preprocessor.pipeline.overlay.set(overlay_class="different")
    dataset.preprocessor.pipeline.overlay.set(update_labels=True)
    sample = dataset[0]
    assert np.array_equal(sample["y"].numpy(), [1, 1])

Example #4

0

Show file

File: test_cnn.py Project: kitzeslab/opensoundscape

def test_predict_wrong_input_error(test_df):
    """cannot pass a preprocessor or dataset to predict. only file paths as list or df"""
    model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0)
    pre = SpectrogramPreprocessor(2.0)
    with pytest.raises(AssertionError):
        model.predict(pre)
    with pytest.raises(AssertionError):
        ds = AudioFileDataset(test_df, pre)
        model.predict(ds)

Example #5

0

Show file

def test_overlay_with_invalid_weight_range(dataset_df, overlay_pre):
    """overlay should allow range [min,max] for overlay_weight"""
    dataset = AudioFileDataset(dataset_df, overlay_pre)
    with pytest.raises(PreprocessingError):
        dataset.preprocessor.pipeline.overlay.set(overlay_weight=[0.1, 1.1])
        sample1 = dataset[0]["X"]
    with pytest.raises(PreprocessingError):
        dataset.preprocessor.pipeline.overlay.set(
            overlay_weight=[0.1, 0.5, 0.9])
        dataset[0]["X"]

Example #6

0

Show file

def test_overlay_update_labels_duplicated_index(dataset_df, overlay_df):
    """duplicate indices of overlay_df are now removed, resolving
    a bug that caused duplicated indices to return 2-d labels.
    """
    overlay_df = pd.concat([overlay_df, overlay_df])
    overlay_pre = SpectrogramPreprocessor(2.0, overlay_df=overlay_df)
    dataset = AudioFileDataset(dataset_df, overlay_pre)
    dataset.preprocessor.pipeline.overlay.set(overlay_class="different")
    dataset.preprocessor.pipeline.overlay.set(update_labels=True)
    sample = dataset[0]
    assert np.array_equal(sample["y"].numpy(), [1, 1])

Example #7

0

Show file

def test_overlay_different_class(dataset_df, overlay_pre):
    """just make sure it runs and doesn't hang"""
    overlay_pre.pipeline.overlay.set(overlay_class="different")
    dataset = AudioFileDataset(dataset_df, overlay_pre)
    dataset[0]["X"]

Example #8

0

Show file

    def predict(
        self,
        samples,
        batch_size=1,
        num_workers=0,
        activation_layer=None,
        binary_preds=None,
        threshold=None,
        split_files_into_clips=True,
        overlap_fraction=0,
        final_clip=None,
        bypass_augmentations=True,
        unsafe_samples_log=None,
    ):
        """Generate predictions on a dataset

        Choose to return any combination of scores, labels, and single-target or
        multi-target binary predictions. Also choose activation layer for scores
        (softmax, sigmoid, softmax then logit, or None). Binary predictions are
        performed post-activation layer

        Note: the order of returned dataframes is (scores, preds, labels)

        Args:
            samples:
                the files to generate predictions for. Can be:
                - a dataframe with index containing audio paths, OR
                - a list (or np.ndarray) of audio file paths
            batch_size:
                Number of files to load simultaneously [default: 1]
            num_workers:
                parallelization (ie cpus or cores), use 0 for current process
                [default: 0]
            activation_layer:
                Optionally apply an activation layer such as sigmoid or
                softmax to the raw outputs of the model.
                options:
                - None: no activation, return raw scores (ie logit, [-inf:inf])
                - 'softmax': scores all classes sum to 1
                - 'sigmoid': all scores in [0,1] but don't sum to 1
                - 'softmax_and_logit': applies softmax first then logit
                [default: None]
            binary_preds:
                Optionally return binary (thresholded 0/1) predictions
                options:
                - 'single_target': max scoring class = 1, others = 0
                - 'multi_target': scores above threshold = 1, others = 0
                - None: do not create or return binary predictions
                [default: None]
                Note: if you choose multi-target, you must specify `threshold`
            threshold:
                prediction threshold(s) for post-activation layer scores.
                Only relevant when binary_preds == 'multi_target'
                If activation layer is sigmoid, choose value in [0,1]
                If activation layer is None or softmax_and_logit, in [-inf,inf]
            overlap_fraction: fraction of overlap between consecutive clips when
                predicting on clips of longer audio files. For instance, 0.5
                gives 50% overlap between consecutive clips.
            final_clip: see `opensoundscape.helpers.generate_clip_times_df`
            bypass_augmentations: If False, Actions with
                is_augmentation==True are performed. Default True.
            unsafe_samples_log: if not None, samples that failed to preprocess
                will be listed in this text file.

        Returns:
            scores: df of post-activation_layer scores
            predictions: df of 0/1 preds for each class
            unsafe_samples: list of samples that failed to preprocess

        Note: if loading an audio file raises a PreprocessingError, the scores
            and predictions for that sample will be np.nan

        Note: if no return type is selected for `binary_preds`, returns None
        instead of a DataFrame for `predictions`
        """
        if binary_preds == "multi_target":
            assert threshold is not None, (
                "Must specify a threshold when"
                " generating multi_target predictions")

        # set up prediction Dataset
        if split_files_into_clips:
            prediction_dataset = AudioSplittingDataset(
                samples=samples,
                preprocessor=self.preprocessor,
                overlap_fraction=overlap_fraction,
                final_clip=final_clip,
            )
        else:
            prediction_dataset = AudioFileDataset(
                samples=samples,
                preprocessor=self.preprocessor,
                return_labels=False)
        prediction_dataset.bypass_augmentations = bypass_augmentations

        ## Input Validation ##
        if len(prediction_dataset.classes) > 0 and not list(
                self.classes) == list(prediction_dataset.classes):
            warnings.warn(
                "The columns of input samples df differ from `model.classes`.")

        if len(prediction_dataset) < 1:
            warnings.warn(
                "prediction_dataset has zero samples. No predictions will be generated."
            )
            scores = pd.DataFrame(columns=self.classes)
            preds = None if binary_preds is None else pd.DataFrame(
                columns=self.classes)
            return scores, preds, prediction_dataset.unsafe_samples

        # SafeDataset will not fail on bad files,
        # but will provide a different sample! Later we go back and replace scores
        # with np.nan for the bad samples (using safe_dataset._unsafe_indices)
        # this approach to error handling feels hacky
        # however, returning None would break the batching of samples
        safe_dataset = SafeDataset(prediction_dataset,
                                   unsafe_behavior="substitute")

        dataloader = torch.utils.data.DataLoader(
            safe_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=False,
            # use pin_memory=True when loading files on CPU and training on GPU
            pin_memory=torch.cuda.is_available(),
        )
        # add any paths that failed to generate a clip df to _unsafe_samples
        dataloader.dataset._unsafe_samples += prediction_dataset.unsafe_samples

        ### Prediction/Inference ###

        # move network to device
        self.network.to(self.device)
        self.network.eval()

        # initialize scores and preds
        total_scores = []
        total_preds = []

        # disable gradient updates during inference
        with torch.set_grad_enabled(False):

            for batch in dataloader:
                # get batch of Tensors
                batch_tensors = batch["X"].to(self.device)
                batch_tensors.requires_grad = False

                # forward pass of network: feature extractor + classifier
                logits = self.network.forward(batch_tensors)

                ### Activation layer ###
                scores = apply_activation_layer(logits, activation_layer)

                ### Binary predictions ###
                batch_preds = tensor_binary_predictions(scores=scores,
                                                        mode=binary_preds,
                                                        threshold=threshold)

                # disable gradients on returned values
                total_scores.append(scores.detach().cpu().numpy())
                total_preds.append(batch_preds.float().detach().cpu().numpy())

        # aggregate across all batches
        total_scores = np.concatenate(total_scores, axis=0)
        total_preds = np.concatenate(total_preds, axis=0)

        # replace scores/preds with nan for samples that failed in preprocessing
        # this feels hacky (we predicted on substitute-samples rather than
        # skipping the samples that failed preprocessing)
        total_scores[dataloader.dataset._unsafe_indices, :] = np.nan
        if binary_preds is not None:
            total_preds[dataloader.dataset._unsafe_indices, :] = np.nan

        # return 2 DataFrames with same index/columns as prediction_dataset's df
        # use None for placeholder if no preds
        samples = prediction_dataset.label_df.index.values
        score_df = pd.DataFrame(index=samples,
                                data=total_scores,
                                columns=self.classes)
        if split_files_into_clips:  # return a multi-index
            score_df.index = pd.MultiIndex.from_frame(
                prediction_dataset.clip_times_df.reset_index())
        # binary 0/1 predictions
        if binary_preds is None:
            pred_df = None
        else:
            pred_df = pd.DataFrame(index=samples,
                                   data=total_preds,
                                   columns=self.classes)
            if split_files_into_clips:  # return a multi-index
                pred_df.index = pd.MultiIndex.from_frame(
                    prediction_dataset.clip_times_df.reset_index())

        print(dataloader.dataset._unsafe_samples)

        # warn the user if there were unsafe samples (failed to preprocess)
        # and log them to a file
        unsafe_samples = dataloader.dataset.report(log=unsafe_samples_log)

        return score_df, pred_df, unsafe_samples

Example #9

0

Show file

    def _set_train(self, train_df, batch_size, num_workers):
        """Prepare network for training on train_df

        Args:
            batch_size: number of training files to load/process before
                        re-calculating the loss function and backpropagation
            num_workers: parallelization (number of cores or cpus)

        Effects:
            Sets up the optimization, loss function, and network
        """

        ###########################
        # Move network to device  #
        ###########################
        self.network.to(self.device)

        ######################
        # Dataloader setup #
        ######################
        train_dataset = AudioFileDataset(train_df, self.preprocessor)
        train_dataset.bypass_augmentations = False

        # SafeDataset loads a new sample if loading a sample throws an error
        # indices of bad samples are appended to ._unsafe_indices
        train_safe_dataset = SafeDataset(train_dataset,
                                         unsafe_behavior="substitute")

        # train_loader samples batches of images + labels from training set
        self.train_loader = self._init_dataloader(
            train_safe_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=True,
        )

        ###########################
        # Setup loss function     #
        ###########################
        self._init_loss_fn()

        ######################
        # Optimization setup #
        ######################

        # Setup optimizer parameters for each network component
        # Note: we re-create bc the user may have changed self.optimizer_cls
        # If optimizer already exists, keep the same state dict
        # (for instance, user may be resuming training w/saved state dict)
        if self.opt_net is not None:
            optim_state_dict = self.opt_net.state_dict()
            self.opt_net = self._init_optimizer()
            self.opt_net.load_state_dict(optim_state_dict)
        else:
            self.opt_net = self._init_optimizer()

        # Set up learning rate cooling schedule
        self.scheduler = optim.lr_scheduler.StepLR(
            self.opt_net,
            step_size=self.lr_update_interval,
            gamma=self.lr_cooling_factor,
            last_epoch=self.current_epoch - 1,
        )

Example #10

0

Show file

def dataset(preprocessor):
    paths = ["tests/audio/veryshort.wav", "tests/audio/silence_10s.mp3"]
    labels = [[0, 1], [1, 0]]
    df = pd.DataFrame(index=paths, data=labels, columns=[0, 1])
    return AudioFileDataset(df, preprocessor)

Example #11

0

Show file

def test_overlay_tries_different_sample(dataset_df, bad_good_df):
    pre = SpectrogramPreprocessor(sample_duration=2.0, overlay_df=bad_good_df)
    dataset = AudioFileDataset(dataset_df, pre)
    # should try to load the bad sample, then load the good one
    dataset[0]["X"]

Example #12

0

Show file

def test_spec_preprocessor_overlay(dataset_df, overlay_df, overlay_pre):
    dataset = AudioFileDataset(dataset_df, overlay_pre)
    sample1 = dataset[0]["X"]
    dataset.preprocessor.pipeline.overlay.bypass = True
    sample2 = dataset[0]["X"]
    assert not np.array_equal(sample1, sample2)

Example #13

0

Show file

def test_spec_preprocessor_augment_on(dataset_df, pre):
    """should return different images each time"""
    dataset = AudioFileDataset(dataset_df, pre)
    sample1 = dataset[0]["X"]
    sample2 = dataset[0]["X"]
    assert not np.array_equal(sample1, sample2)

Example #14

0

Show file

def test_spec_preprocessor_augment_off(dataset_df, pre):
    """should return same image each time"""
    dataset = AudioFileDataset(dataset_df, pre, bypass_augmentations=True)
    sample1 = dataset[0]["X"].numpy()
    sample2 = dataset[0]["X"].numpy()
    assert np.array_equal(sample1, sample2)

Example #15

0

Show file

def test_return_labels_no_columns_warning(dataset_df, pre):
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        # raises warning bc return_labels=True but no columns in df
        AudioFileDataset(dataset_df[[]], pre)
        assert "return_labels" in str(w[0].message)

Example #16

0

Show file

def small_dataset(dataset_df, overlay_pre):
    return AudioFileDataset(dataset_df, overlay_pre)

Example #17

0

Show file

def test_overlay_specific_class(dataset_df, overlay_pre):
    """just make sure it runs and doesn't hang"""
    dataset = AudioFileDataset(dataset_df, overlay_pre)
    dataset.preprocessor.pipeline.overlay.set(overlay_class=1)
    dataset[0]["X"]

Example #18

0

Show file

def test_overlay_with_weight_range(dataset_df, overlay_pre):
    """overlay should allow range [min,max] for overlay_weight"""
    dataset = AudioFileDataset(dataset_df, overlay_pre)
    dataset.preprocessor.pipeline.overlay.set(overlay_weight=[0.3, 0.7])
    dataset[0]["X"]