Example #1
0
def test_safe_preprocessor_handles_short_file(preprocessor):
    """should raise warning but not fail"""
    dataset = SafeDataset(preprocessor)

    sample = dataset[0]

    # skips first sample when it fails and loads next
    assert np.array_equal(sample["y"].numpy(), [1, 0])
    # stores failed samples in ._unsafe_indices
    assert len(dataset._unsafe_indices) == 1
Example #2
0
def test_safe_dataset_returns_none(preprocessor):
    """should give None for the sample"""
    dataset = SafeDataset(preprocessor, unsafe_behavior="none")

    sample = dataset[0]

    # returns None for the sample
    assert sample is None

    # stores failed samples in ._unsafe_indices
    assert len(dataset._unsafe_indices) == 1
Example #3
0
def test_safe_dataset_handles_short_file(dataset):
    """should raise warning but not fail"""
    dataset = SafeDataset(dataset, unsafe_behavior="substitute")
    dataset.dataset.preprocessor.pipeline.trim_audio.set(extend=False)
    dataset.dataset.preprocessor.pipeline.random_trim_audio.set(extend=False)
    sample = dataset[0]

    # skips first sample when it fails and loads next
    assert np.array_equal(sample["y"].numpy(), [1, 0])

    # stores failed samples in ._unsafe_indices
    assert len(dataset._unsafe_indices) == 1
Example #4
0
    def predict(
        self,
        samples,
        batch_size=1,
        num_workers=0,
        activation_layer=None,
        binary_preds=None,
        threshold=None,
        split_files_into_clips=True,
        overlap_fraction=0,
        final_clip=None,
        bypass_augmentations=True,
        unsafe_samples_log=None,
    ):
        """Generate predictions on a dataset

        Choose to return any combination of scores, labels, and single-target or
        multi-target binary predictions. Also choose activation layer for scores
        (softmax, sigmoid, softmax then logit, or None). Binary predictions are
        performed post-activation layer

        Note: the order of returned dataframes is (scores, preds, labels)

        Args:
            samples:
                the files to generate predictions for. Can be:
                - a dataframe with index containing audio paths, OR
                - a list (or np.ndarray) of audio file paths
            batch_size:
                Number of files to load simultaneously [default: 1]
            num_workers:
                parallelization (ie cpus or cores), use 0 for current process
                [default: 0]
            activation_layer:
                Optionally apply an activation layer such as sigmoid or
                softmax to the raw outputs of the model.
                options:
                - None: no activation, return raw scores (ie logit, [-inf:inf])
                - 'softmax': scores all classes sum to 1
                - 'sigmoid': all scores in [0,1] but don't sum to 1
                - 'softmax_and_logit': applies softmax first then logit
                [default: None]
            binary_preds:
                Optionally return binary (thresholded 0/1) predictions
                options:
                - 'single_target': max scoring class = 1, others = 0
                - 'multi_target': scores above threshold = 1, others = 0
                - None: do not create or return binary predictions
                [default: None]
                Note: if you choose multi-target, you must specify `threshold`
            threshold:
                prediction threshold(s) for post-activation layer scores.
                Only relevant when binary_preds == 'multi_target'
                If activation layer is sigmoid, choose value in [0,1]
                If activation layer is None or softmax_and_logit, in [-inf,inf]
            overlap_fraction: fraction of overlap between consecutive clips when
                predicting on clips of longer audio files. For instance, 0.5
                gives 50% overlap between consecutive clips.
            final_clip: see `opensoundscape.helpers.generate_clip_times_df`
            bypass_augmentations: If False, Actions with
                is_augmentation==True are performed. Default True.
            unsafe_samples_log: if not None, samples that failed to preprocess
                will be listed in this text file.

        Returns:
            scores: df of post-activation_layer scores
            predictions: df of 0/1 preds for each class
            unsafe_samples: list of samples that failed to preprocess

        Note: if loading an audio file raises a PreprocessingError, the scores
            and predictions for that sample will be np.nan

        Note: if no return type is selected for `binary_preds`, returns None
        instead of a DataFrame for `predictions`
        """
        if binary_preds == "multi_target":
            assert threshold is not None, (
                "Must specify a threshold when"
                " generating multi_target predictions")

        # set up prediction Dataset
        if split_files_into_clips:
            prediction_dataset = AudioSplittingDataset(
                samples=samples,
                preprocessor=self.preprocessor,
                overlap_fraction=overlap_fraction,
                final_clip=final_clip,
            )
        else:
            prediction_dataset = AudioFileDataset(
                samples=samples,
                preprocessor=self.preprocessor,
                return_labels=False)
        prediction_dataset.bypass_augmentations = bypass_augmentations

        ## Input Validation ##
        if len(prediction_dataset.classes) > 0 and not list(
                self.classes) == list(prediction_dataset.classes):
            warnings.warn(
                "The columns of input samples df differ from `model.classes`.")

        if len(prediction_dataset) < 1:
            warnings.warn(
                "prediction_dataset has zero samples. No predictions will be generated."
            )
            scores = pd.DataFrame(columns=self.classes)
            preds = None if binary_preds is None else pd.DataFrame(
                columns=self.classes)
            return scores, preds, prediction_dataset.unsafe_samples

        # SafeDataset will not fail on bad files,
        # but will provide a different sample! Later we go back and replace scores
        # with np.nan for the bad samples (using safe_dataset._unsafe_indices)
        # this approach to error handling feels hacky
        # however, returning None would break the batching of samples
        safe_dataset = SafeDataset(prediction_dataset,
                                   unsafe_behavior="substitute")

        dataloader = torch.utils.data.DataLoader(
            safe_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=False,
            # use pin_memory=True when loading files on CPU and training on GPU
            pin_memory=torch.cuda.is_available(),
        )
        # add any paths that failed to generate a clip df to _unsafe_samples
        dataloader.dataset._unsafe_samples += prediction_dataset.unsafe_samples

        ### Prediction/Inference ###

        # move network to device
        self.network.to(self.device)
        self.network.eval()

        # initialize scores and preds
        total_scores = []
        total_preds = []

        # disable gradient updates during inference
        with torch.set_grad_enabled(False):

            for batch in dataloader:
                # get batch of Tensors
                batch_tensors = batch["X"].to(self.device)
                batch_tensors.requires_grad = False

                # forward pass of network: feature extractor + classifier
                logits = self.network.forward(batch_tensors)

                ### Activation layer ###
                scores = apply_activation_layer(logits, activation_layer)

                ### Binary predictions ###
                batch_preds = tensor_binary_predictions(scores=scores,
                                                        mode=binary_preds,
                                                        threshold=threshold)

                # disable gradients on returned values
                total_scores.append(scores.detach().cpu().numpy())
                total_preds.append(batch_preds.float().detach().cpu().numpy())

        # aggregate across all batches
        total_scores = np.concatenate(total_scores, axis=0)
        total_preds = np.concatenate(total_preds, axis=0)

        # replace scores/preds with nan for samples that failed in preprocessing
        # this feels hacky (we predicted on substitute-samples rather than
        # skipping the samples that failed preprocessing)
        total_scores[dataloader.dataset._unsafe_indices, :] = np.nan
        if binary_preds is not None:
            total_preds[dataloader.dataset._unsafe_indices, :] = np.nan

        # return 2 DataFrames with same index/columns as prediction_dataset's df
        # use None for placeholder if no preds
        samples = prediction_dataset.label_df.index.values
        score_df = pd.DataFrame(index=samples,
                                data=total_scores,
                                columns=self.classes)
        if split_files_into_clips:  # return a multi-index
            score_df.index = pd.MultiIndex.from_frame(
                prediction_dataset.clip_times_df.reset_index())
        # binary 0/1 predictions
        if binary_preds is None:
            pred_df = None
        else:
            pred_df = pd.DataFrame(index=samples,
                                   data=total_preds,
                                   columns=self.classes)
            if split_files_into_clips:  # return a multi-index
                pred_df.index = pd.MultiIndex.from_frame(
                    prediction_dataset.clip_times_df.reset_index())

        print(dataloader.dataset._unsafe_samples)

        # warn the user if there were unsafe samples (failed to preprocess)
        # and log them to a file
        unsafe_samples = dataloader.dataset.report(log=unsafe_samples_log)

        return score_df, pred_df, unsafe_samples
Example #5
0
    def _set_train(self, train_df, batch_size, num_workers):
        """Prepare network for training on train_df

        Args:
            batch_size: number of training files to load/process before
                        re-calculating the loss function and backpropagation
            num_workers: parallelization (number of cores or cpus)

        Effects:
            Sets up the optimization, loss function, and network
        """

        ###########################
        # Move network to device  #
        ###########################
        self.network.to(self.device)

        ######################
        # Dataloader setup #
        ######################
        train_dataset = AudioFileDataset(train_df, self.preprocessor)
        train_dataset.bypass_augmentations = False

        # SafeDataset loads a new sample if loading a sample throws an error
        # indices of bad samples are appended to ._unsafe_indices
        train_safe_dataset = SafeDataset(train_dataset,
                                         unsafe_behavior="substitute")

        # train_loader samples batches of images + labels from training set
        self.train_loader = self._init_dataloader(
            train_safe_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=True,
        )

        ###########################
        # Setup loss function     #
        ###########################
        self._init_loss_fn()

        ######################
        # Optimization setup #
        ######################

        # Setup optimizer parameters for each network component
        # Note: we re-create bc the user may have changed self.optimizer_cls
        # If optimizer already exists, keep the same state dict
        # (for instance, user may be resuming training w/saved state dict)
        if self.opt_net is not None:
            optim_state_dict = self.opt_net.state_dict()
            self.opt_net = self._init_optimizer()
            self.opt_net.load_state_dict(optim_state_dict)
        else:
            self.opt_net = self._init_optimizer()

        # Set up learning rate cooling schedule
        self.scheduler = optim.lr_scheduler.StepLR(
            self.opt_net,
            step_size=self.lr_update_interval,
            gamma=self.lr_cooling_factor,
            last_epoch=self.current_epoch - 1,
        )
Example #6
0
    def predict(
        self,
        prediction_dataset,
        batch_size=1,
        num_workers=0,
        activation_layer=None,  # softmax','sigmoid','softmax_and_logit', None
        binary_preds=None,  #'single_target','multi_target', None
        threshold=0.5,
        error_log=None,
    ):
        """Generate predictions on a dataset

        Choose to return any combination of scores, labels, and single-target or
        multi-target binary predictions. Also choose activation layer for scores
        (softmax, sigmoid, softmax then logit, or None).

        Note: the order of returned dataframes is (scores, preds, labels)

        Args:
            prediction_dataset:
                a Preprocessor or DataSset object that returns tensors,
                such as AudioToSpectrogramPreprocessor (no augmentation)
                or CnnPreprocessor (w/augmentation) from opensoundscape.datasets
            batch_size:
                Number of files to load simultaneously [default: 1]
            num_workers:
                parallelization (ie cpus or cores), use 0 for current process
                [default: 0]
            activation_layer:
                Optionally apply an activation layer such as sigmoid or
                softmax to the raw outputs of the model.
                options:
                - None: no activation, return raw scores (ie logit, [-inf:inf])
                - 'softmax': scores all classes sum to 1
                - 'sigmoid': all scores in [0,1] but don't sum to 1
                - 'softmax_and_logit': applies softmax first then logit
                [default: None]
            binary_preds:
                Optionally return binary (thresholded 0/1) predictions
                options:
                - 'single_target': max scoring class = 1, others = 0
                - 'multi_target': scores above threshold = 1, others = 0
                - None: do not create or return binary predictions
                [default: None]
            threshold:
                prediction threshold for sigmoid scores. Only relevant when
                binary_preds == 'multi_target'
            error_log:
                if not None, saves a list of files that raised errors to
                the specified file location [default: None]

        Returns: 3 DataFrames (or Nones), w/index matching prediciton_dataset.df
            scores: post-activation_layer scores
            predictions: 0/1 preds for each class
            labels: labels from dataset (if available)

        Note: if loading an audio file raises a PreprocessingError, the scores
            and predictions for that sample will be np.nan

        Note: if no return type selected for labels/scores/preds, returns None
        instead of a DataFrame in the returned tuple
        """
        err_msg = ("Prediction dataset must have same classes"
                   "and class order as model object, or no classes.")
        if len(prediction_dataset.df.columns) > 0:
            assert list(self.classes) == list(
                prediction_dataset.df.columns), err_msg

        if torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")
        self.network.to(self.device)

        self.network.eval()

        # SafeDataset will not fail on bad files,
        # but will provide a different sample! Later we go back and replace scores
        # with np.nan for the bad samples (using safe_dataset._unsafe_indices)
        # this approach to error handling feels hacky
        safe_dataset = SafeDataset(prediction_dataset)

        dataloader = torch.utils.data.DataLoader(
            safe_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=False,
            # use pin_memory=True when loading files on CPU and training on GPU
            pin_memory=torch.cuda.is_available(),
        )

        ### Prediction ###
        total_scores = []
        total_preds = []
        total_tgts = []

        failed_files = []  # keep list of any samples that raise errors

        has_labels = False

        # disable gradient updates during inference
        with torch.set_grad_enabled(False):

            for batch in dataloader:
                # get batch of Tensors
                batch_tensors = batch["X"].to(self.device)
                batch_tensors.requires_grad = False
                # get batch's labels if available
                batch_targets = torch.Tensor([]).to(self.device)
                if "y" in batch.keys():
                    batch_targets = batch["y"].to(self.device)
                    batch_targets.requires_grad = False
                    has_labels = True

                # forward pass of network: feature extractor + classifier
                logits = self.network.forward(batch_tensors)

                ### Activation layer ###
                if activation_layer == None:  # scores [-inf,inf]
                    scores = logits
                elif activation_layer == "softmax":
                    # "softmax" activation: preds across all classes sum to 1
                    scores = softmax(logits, 1)
                elif activation_layer == "sigmoid":  # map [-inf,inf] to [0,1]
                    scores = torch.sigmoid(logits)
                elif activation_layer == "softmax_and_logit":  # scores [-inf,inf]
                    scores = torch.logit(softmax(logits, 1))
                else:
                    raise ValueError(
                        f"invalid option for activation_layer: {activation_layer}"
                    )

                ### Binary predictions ###
                # generate binary predictions
                if binary_preds == "single_target":
                    # predict highest scoring class only
                    batch_preds = F.one_hot(logits.argmax(1), len(logits[0]))
                elif binary_preds == "multi_target":
                    # predict 0 or 1 based on a fixed threshold
                    batch_preds = torch.sigmoid(logits) >= threshold
                elif binary_preds is None:
                    batch_preds = torch.Tensor([])
                else:
                    raise ValueError(
                        f"invalid option for binary_preds: {binary_preds}")

                # detach the returned values: currently tethered to gradients
                # and updates via optimizer/backprop. detach() returns
                # just numeric values.
                total_scores.append(scores.detach().cpu().numpy())
                total_preds.append(batch_preds.float().detach().cpu().numpy())
                total_tgts.append(batch_targets.int().detach().cpu().numpy())

        # aggregate across all batches
        total_tgts = np.concatenate(total_tgts, axis=0)
        total_scores = np.concatenate(total_scores, axis=0)
        total_preds = np.concatenate(total_preds, axis=0)

        print(np.shape(total_scores))

        # replace scores/preds with nan for samples that failed in preprocessing
        # this feels hacky (we predicted on substitute-samples rather than
        # skipping the samples that failed preprocessing)
        total_scores[safe_dataset._unsafe_indices, :] = np.nan
        if binary_preds is not None:
            total_preds[safe_dataset._unsafe_indices, :] = np.nan

        # return 3 DataFrames with same index/columns as prediction_dataset's df
        # use None for placeholder if no preds / labels
        samples = prediction_dataset.df.index.values
        score_df = pd.DataFrame(index=samples,
                                data=total_scores,
                                columns=self.classes)
        pred_df = (None if binary_preds is None else pd.DataFrame(
            index=samples, data=total_preds, columns=self.classes))
        label_df = (None if not has_labels else pd.DataFrame(
            index=samples, data=total_tgts, columns=self.classes))

        return score_df, pred_df, label_df
Example #7
0
def test_safe_dataset_raises(preprocessor):
    """should raise an exception on bad sample"""
    dataset = SafeDataset(preprocessor, unsafe_behavior="raise")

    with pytest.raises(Exception):
        sample = dataset[0]