def test_safe_preprocessor_handles_short_file(preprocessor): """should raise warning but not fail""" dataset = SafeDataset(preprocessor) sample = dataset[0] # skips first sample when it fails and loads next assert np.array_equal(sample["y"].numpy(), [1, 0]) # stores failed samples in ._unsafe_indices assert len(dataset._unsafe_indices) == 1
def test_safe_dataset_returns_none(preprocessor): """should give None for the sample""" dataset = SafeDataset(preprocessor, unsafe_behavior="none") sample = dataset[0] # returns None for the sample assert sample is None # stores failed samples in ._unsafe_indices assert len(dataset._unsafe_indices) == 1
def test_safe_dataset_handles_short_file(dataset): """should raise warning but not fail""" dataset = SafeDataset(dataset, unsafe_behavior="substitute") dataset.dataset.preprocessor.pipeline.trim_audio.set(extend=False) dataset.dataset.preprocessor.pipeline.random_trim_audio.set(extend=False) sample = dataset[0] # skips first sample when it fails and loads next assert np.array_equal(sample["y"].numpy(), [1, 0]) # stores failed samples in ._unsafe_indices assert len(dataset._unsafe_indices) == 1
def predict( self, samples, batch_size=1, num_workers=0, activation_layer=None, binary_preds=None, threshold=None, split_files_into_clips=True, overlap_fraction=0, final_clip=None, bypass_augmentations=True, unsafe_samples_log=None, ): """Generate predictions on a dataset Choose to return any combination of scores, labels, and single-target or multi-target binary predictions. Also choose activation layer for scores (softmax, sigmoid, softmax then logit, or None). Binary predictions are performed post-activation layer Note: the order of returned dataframes is (scores, preds, labels) Args: samples: the files to generate predictions for. Can be: - a dataframe with index containing audio paths, OR - a list (or np.ndarray) of audio file paths batch_size: Number of files to load simultaneously [default: 1] num_workers: parallelization (ie cpus or cores), use 0 for current process [default: 0] activation_layer: Optionally apply an activation layer such as sigmoid or softmax to the raw outputs of the model. options: - None: no activation, return raw scores (ie logit, [-inf:inf]) - 'softmax': scores all classes sum to 1 - 'sigmoid': all scores in [0,1] but don't sum to 1 - 'softmax_and_logit': applies softmax first then logit [default: None] binary_preds: Optionally return binary (thresholded 0/1) predictions options: - 'single_target': max scoring class = 1, others = 0 - 'multi_target': scores above threshold = 1, others = 0 - None: do not create or return binary predictions [default: None] Note: if you choose multi-target, you must specify `threshold` threshold: prediction threshold(s) for post-activation layer scores. Only relevant when binary_preds == 'multi_target' If activation layer is sigmoid, choose value in [0,1] If activation layer is None or softmax_and_logit, in [-inf,inf] overlap_fraction: fraction of overlap between consecutive clips when predicting on clips of longer audio files. For instance, 0.5 gives 50% overlap between consecutive clips. final_clip: see `opensoundscape.helpers.generate_clip_times_df` bypass_augmentations: If False, Actions with is_augmentation==True are performed. Default True. unsafe_samples_log: if not None, samples that failed to preprocess will be listed in this text file. Returns: scores: df of post-activation_layer scores predictions: df of 0/1 preds for each class unsafe_samples: list of samples that failed to preprocess Note: if loading an audio file raises a PreprocessingError, the scores and predictions for that sample will be np.nan Note: if no return type is selected for `binary_preds`, returns None instead of a DataFrame for `predictions` """ if binary_preds == "multi_target": assert threshold is not None, ( "Must specify a threshold when" " generating multi_target predictions") # set up prediction Dataset if split_files_into_clips: prediction_dataset = AudioSplittingDataset( samples=samples, preprocessor=self.preprocessor, overlap_fraction=overlap_fraction, final_clip=final_clip, ) else: prediction_dataset = AudioFileDataset( samples=samples, preprocessor=self.preprocessor, return_labels=False) prediction_dataset.bypass_augmentations = bypass_augmentations ## Input Validation ## if len(prediction_dataset.classes) > 0 and not list( self.classes) == list(prediction_dataset.classes): warnings.warn( "The columns of input samples df differ from `model.classes`.") if len(prediction_dataset) < 1: warnings.warn( "prediction_dataset has zero samples. No predictions will be generated." ) scores = pd.DataFrame(columns=self.classes) preds = None if binary_preds is None else pd.DataFrame( columns=self.classes) return scores, preds, prediction_dataset.unsafe_samples # SafeDataset will not fail on bad files, # but will provide a different sample! Later we go back and replace scores # with np.nan for the bad samples (using safe_dataset._unsafe_indices) # this approach to error handling feels hacky # however, returning None would break the batching of samples safe_dataset = SafeDataset(prediction_dataset, unsafe_behavior="substitute") dataloader = torch.utils.data.DataLoader( safe_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, # use pin_memory=True when loading files on CPU and training on GPU pin_memory=torch.cuda.is_available(), ) # add any paths that failed to generate a clip df to _unsafe_samples dataloader.dataset._unsafe_samples += prediction_dataset.unsafe_samples ### Prediction/Inference ### # move network to device self.network.to(self.device) self.network.eval() # initialize scores and preds total_scores = [] total_preds = [] # disable gradient updates during inference with torch.set_grad_enabled(False): for batch in dataloader: # get batch of Tensors batch_tensors = batch["X"].to(self.device) batch_tensors.requires_grad = False # forward pass of network: feature extractor + classifier logits = self.network.forward(batch_tensors) ### Activation layer ### scores = apply_activation_layer(logits, activation_layer) ### Binary predictions ### batch_preds = tensor_binary_predictions(scores=scores, mode=binary_preds, threshold=threshold) # disable gradients on returned values total_scores.append(scores.detach().cpu().numpy()) total_preds.append(batch_preds.float().detach().cpu().numpy()) # aggregate across all batches total_scores = np.concatenate(total_scores, axis=0) total_preds = np.concatenate(total_preds, axis=0) # replace scores/preds with nan for samples that failed in preprocessing # this feels hacky (we predicted on substitute-samples rather than # skipping the samples that failed preprocessing) total_scores[dataloader.dataset._unsafe_indices, :] = np.nan if binary_preds is not None: total_preds[dataloader.dataset._unsafe_indices, :] = np.nan # return 2 DataFrames with same index/columns as prediction_dataset's df # use None for placeholder if no preds samples = prediction_dataset.label_df.index.values score_df = pd.DataFrame(index=samples, data=total_scores, columns=self.classes) if split_files_into_clips: # return a multi-index score_df.index = pd.MultiIndex.from_frame( prediction_dataset.clip_times_df.reset_index()) # binary 0/1 predictions if binary_preds is None: pred_df = None else: pred_df = pd.DataFrame(index=samples, data=total_preds, columns=self.classes) if split_files_into_clips: # return a multi-index pred_df.index = pd.MultiIndex.from_frame( prediction_dataset.clip_times_df.reset_index()) print(dataloader.dataset._unsafe_samples) # warn the user if there were unsafe samples (failed to preprocess) # and log them to a file unsafe_samples = dataloader.dataset.report(log=unsafe_samples_log) return score_df, pred_df, unsafe_samples
def _set_train(self, train_df, batch_size, num_workers): """Prepare network for training on train_df Args: batch_size: number of training files to load/process before re-calculating the loss function and backpropagation num_workers: parallelization (number of cores or cpus) Effects: Sets up the optimization, loss function, and network """ ########################### # Move network to device # ########################### self.network.to(self.device) ###################### # Dataloader setup # ###################### train_dataset = AudioFileDataset(train_df, self.preprocessor) train_dataset.bypass_augmentations = False # SafeDataset loads a new sample if loading a sample throws an error # indices of bad samples are appended to ._unsafe_indices train_safe_dataset = SafeDataset(train_dataset, unsafe_behavior="substitute") # train_loader samples batches of images + labels from training set self.train_loader = self._init_dataloader( train_safe_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, ) ########################### # Setup loss function # ########################### self._init_loss_fn() ###################### # Optimization setup # ###################### # Setup optimizer parameters for each network component # Note: we re-create bc the user may have changed self.optimizer_cls # If optimizer already exists, keep the same state dict # (for instance, user may be resuming training w/saved state dict) if self.opt_net is not None: optim_state_dict = self.opt_net.state_dict() self.opt_net = self._init_optimizer() self.opt_net.load_state_dict(optim_state_dict) else: self.opt_net = self._init_optimizer() # Set up learning rate cooling schedule self.scheduler = optim.lr_scheduler.StepLR( self.opt_net, step_size=self.lr_update_interval, gamma=self.lr_cooling_factor, last_epoch=self.current_epoch - 1, )
def predict( self, prediction_dataset, batch_size=1, num_workers=0, activation_layer=None, # softmax','sigmoid','softmax_and_logit', None binary_preds=None, #'single_target','multi_target', None threshold=0.5, error_log=None, ): """Generate predictions on a dataset Choose to return any combination of scores, labels, and single-target or multi-target binary predictions. Also choose activation layer for scores (softmax, sigmoid, softmax then logit, or None). Note: the order of returned dataframes is (scores, preds, labels) Args: prediction_dataset: a Preprocessor or DataSset object that returns tensors, such as AudioToSpectrogramPreprocessor (no augmentation) or CnnPreprocessor (w/augmentation) from opensoundscape.datasets batch_size: Number of files to load simultaneously [default: 1] num_workers: parallelization (ie cpus or cores), use 0 for current process [default: 0] activation_layer: Optionally apply an activation layer such as sigmoid or softmax to the raw outputs of the model. options: - None: no activation, return raw scores (ie logit, [-inf:inf]) - 'softmax': scores all classes sum to 1 - 'sigmoid': all scores in [0,1] but don't sum to 1 - 'softmax_and_logit': applies softmax first then logit [default: None] binary_preds: Optionally return binary (thresholded 0/1) predictions options: - 'single_target': max scoring class = 1, others = 0 - 'multi_target': scores above threshold = 1, others = 0 - None: do not create or return binary predictions [default: None] threshold: prediction threshold for sigmoid scores. Only relevant when binary_preds == 'multi_target' error_log: if not None, saves a list of files that raised errors to the specified file location [default: None] Returns: 3 DataFrames (or Nones), w/index matching prediciton_dataset.df scores: post-activation_layer scores predictions: 0/1 preds for each class labels: labels from dataset (if available) Note: if loading an audio file raises a PreprocessingError, the scores and predictions for that sample will be np.nan Note: if no return type selected for labels/scores/preds, returns None instead of a DataFrame in the returned tuple """ err_msg = ("Prediction dataset must have same classes" "and class order as model object, or no classes.") if len(prediction_dataset.df.columns) > 0: assert list(self.classes) == list( prediction_dataset.df.columns), err_msg if torch.cuda.is_available(): self.device = torch.device("cuda") else: self.device = torch.device("cpu") self.network.to(self.device) self.network.eval() # SafeDataset will not fail on bad files, # but will provide a different sample! Later we go back and replace scores # with np.nan for the bad samples (using safe_dataset._unsafe_indices) # this approach to error handling feels hacky safe_dataset = SafeDataset(prediction_dataset) dataloader = torch.utils.data.DataLoader( safe_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, # use pin_memory=True when loading files on CPU and training on GPU pin_memory=torch.cuda.is_available(), ) ### Prediction ### total_scores = [] total_preds = [] total_tgts = [] failed_files = [] # keep list of any samples that raise errors has_labels = False # disable gradient updates during inference with torch.set_grad_enabled(False): for batch in dataloader: # get batch of Tensors batch_tensors = batch["X"].to(self.device) batch_tensors.requires_grad = False # get batch's labels if available batch_targets = torch.Tensor([]).to(self.device) if "y" in batch.keys(): batch_targets = batch["y"].to(self.device) batch_targets.requires_grad = False has_labels = True # forward pass of network: feature extractor + classifier logits = self.network.forward(batch_tensors) ### Activation layer ### if activation_layer == None: # scores [-inf,inf] scores = logits elif activation_layer == "softmax": # "softmax" activation: preds across all classes sum to 1 scores = softmax(logits, 1) elif activation_layer == "sigmoid": # map [-inf,inf] to [0,1] scores = torch.sigmoid(logits) elif activation_layer == "softmax_and_logit": # scores [-inf,inf] scores = torch.logit(softmax(logits, 1)) else: raise ValueError( f"invalid option for activation_layer: {activation_layer}" ) ### Binary predictions ### # generate binary predictions if binary_preds == "single_target": # predict highest scoring class only batch_preds = F.one_hot(logits.argmax(1), len(logits[0])) elif binary_preds == "multi_target": # predict 0 or 1 based on a fixed threshold batch_preds = torch.sigmoid(logits) >= threshold elif binary_preds is None: batch_preds = torch.Tensor([]) else: raise ValueError( f"invalid option for binary_preds: {binary_preds}") # detach the returned values: currently tethered to gradients # and updates via optimizer/backprop. detach() returns # just numeric values. total_scores.append(scores.detach().cpu().numpy()) total_preds.append(batch_preds.float().detach().cpu().numpy()) total_tgts.append(batch_targets.int().detach().cpu().numpy()) # aggregate across all batches total_tgts = np.concatenate(total_tgts, axis=0) total_scores = np.concatenate(total_scores, axis=0) total_preds = np.concatenate(total_preds, axis=0) print(np.shape(total_scores)) # replace scores/preds with nan for samples that failed in preprocessing # this feels hacky (we predicted on substitute-samples rather than # skipping the samples that failed preprocessing) total_scores[safe_dataset._unsafe_indices, :] = np.nan if binary_preds is not None: total_preds[safe_dataset._unsafe_indices, :] = np.nan # return 3 DataFrames with same index/columns as prediction_dataset's df # use None for placeholder if no preds / labels samples = prediction_dataset.df.index.values score_df = pd.DataFrame(index=samples, data=total_scores, columns=self.classes) pred_df = (None if binary_preds is None else pd.DataFrame( index=samples, data=total_preds, columns=self.classes)) label_df = (None if not has_labels else pd.DataFrame( index=samples, data=total_tgts, columns=self.classes)) return score_df, pred_df, label_df
def test_safe_dataset_raises(preprocessor): """should raise an exception on bad sample""" dataset = SafeDataset(preprocessor, unsafe_behavior="raise") with pytest.raises(Exception): sample = dataset[0]