def test_overlay_no_valid_samples(dataset_df, overlay_df_all_positive): pre = SpectrogramPreprocessor(sample_duration=2.0, overlay_df=overlay_df_all_positive) dataset = AudioFileDataset(dataset_df, pre) dataset.preprocessor.pipeline.overlay.set(overlay_class="different") with pytest.raises(PreprocessingError): sample1 = dataset[0]["X"] # no samples with "different" labels
def test_audio_file_dataset(dataset_df, pre): """should return tensor and labels""" pre.bypass_augmentation = False dataset = AudioFileDataset(dataset_df, pre) sample1 = dataset[0]["X"] assert sample1.numpy().shape == (3, 224, 224) assert dataset[0]["y"].numpy().shape == (2, )
def test_overlay_update_labels(dataset_df, overlay_pre): """should return different images each time""" dataset = AudioFileDataset(dataset_df, overlay_pre) dataset.preprocessor.pipeline.overlay.set(overlay_class="different") dataset.preprocessor.pipeline.overlay.set(update_labels=True) sample = dataset[0] assert np.array_equal(sample["y"].numpy(), [1, 1])
def test_predict_wrong_input_error(test_df): """cannot pass a preprocessor or dataset to predict. only file paths as list or df""" model = cnn.CNN("resnet18", classes=[0, 1], sample_duration=5.0) pre = SpectrogramPreprocessor(2.0) with pytest.raises(AssertionError): model.predict(pre) with pytest.raises(AssertionError): ds = AudioFileDataset(test_df, pre) model.predict(ds)
def test_overlay_with_invalid_weight_range(dataset_df, overlay_pre): """overlay should allow range [min,max] for overlay_weight""" dataset = AudioFileDataset(dataset_df, overlay_pre) with pytest.raises(PreprocessingError): dataset.preprocessor.pipeline.overlay.set(overlay_weight=[0.1, 1.1]) sample1 = dataset[0]["X"] with pytest.raises(PreprocessingError): dataset.preprocessor.pipeline.overlay.set( overlay_weight=[0.1, 0.5, 0.9]) dataset[0]["X"]
def test_overlay_update_labels_duplicated_index(dataset_df, overlay_df): """duplicate indices of overlay_df are now removed, resolving a bug that caused duplicated indices to return 2-d labels. """ overlay_df = pd.concat([overlay_df, overlay_df]) overlay_pre = SpectrogramPreprocessor(2.0, overlay_df=overlay_df) dataset = AudioFileDataset(dataset_df, overlay_pre) dataset.preprocessor.pipeline.overlay.set(overlay_class="different") dataset.preprocessor.pipeline.overlay.set(update_labels=True) sample = dataset[0] assert np.array_equal(sample["y"].numpy(), [1, 1])
def test_overlay_different_class(dataset_df, overlay_pre): """just make sure it runs and doesn't hang""" overlay_pre.pipeline.overlay.set(overlay_class="different") dataset = AudioFileDataset(dataset_df, overlay_pre) dataset[0]["X"]
def predict( self, samples, batch_size=1, num_workers=0, activation_layer=None, binary_preds=None, threshold=None, split_files_into_clips=True, overlap_fraction=0, final_clip=None, bypass_augmentations=True, unsafe_samples_log=None, ): """Generate predictions on a dataset Choose to return any combination of scores, labels, and single-target or multi-target binary predictions. Also choose activation layer for scores (softmax, sigmoid, softmax then logit, or None). Binary predictions are performed post-activation layer Note: the order of returned dataframes is (scores, preds, labels) Args: samples: the files to generate predictions for. Can be: - a dataframe with index containing audio paths, OR - a list (or np.ndarray) of audio file paths batch_size: Number of files to load simultaneously [default: 1] num_workers: parallelization (ie cpus or cores), use 0 for current process [default: 0] activation_layer: Optionally apply an activation layer such as sigmoid or softmax to the raw outputs of the model. options: - None: no activation, return raw scores (ie logit, [-inf:inf]) - 'softmax': scores all classes sum to 1 - 'sigmoid': all scores in [0,1] but don't sum to 1 - 'softmax_and_logit': applies softmax first then logit [default: None] binary_preds: Optionally return binary (thresholded 0/1) predictions options: - 'single_target': max scoring class = 1, others = 0 - 'multi_target': scores above threshold = 1, others = 0 - None: do not create or return binary predictions [default: None] Note: if you choose multi-target, you must specify `threshold` threshold: prediction threshold(s) for post-activation layer scores. Only relevant when binary_preds == 'multi_target' If activation layer is sigmoid, choose value in [0,1] If activation layer is None or softmax_and_logit, in [-inf,inf] overlap_fraction: fraction of overlap between consecutive clips when predicting on clips of longer audio files. For instance, 0.5 gives 50% overlap between consecutive clips. final_clip: see `opensoundscape.helpers.generate_clip_times_df` bypass_augmentations: If False, Actions with is_augmentation==True are performed. Default True. unsafe_samples_log: if not None, samples that failed to preprocess will be listed in this text file. Returns: scores: df of post-activation_layer scores predictions: df of 0/1 preds for each class unsafe_samples: list of samples that failed to preprocess Note: if loading an audio file raises a PreprocessingError, the scores and predictions for that sample will be np.nan Note: if no return type is selected for `binary_preds`, returns None instead of a DataFrame for `predictions` """ if binary_preds == "multi_target": assert threshold is not None, ( "Must specify a threshold when" " generating multi_target predictions") # set up prediction Dataset if split_files_into_clips: prediction_dataset = AudioSplittingDataset( samples=samples, preprocessor=self.preprocessor, overlap_fraction=overlap_fraction, final_clip=final_clip, ) else: prediction_dataset = AudioFileDataset( samples=samples, preprocessor=self.preprocessor, return_labels=False) prediction_dataset.bypass_augmentations = bypass_augmentations ## Input Validation ## if len(prediction_dataset.classes) > 0 and not list( self.classes) == list(prediction_dataset.classes): warnings.warn( "The columns of input samples df differ from `model.classes`.") if len(prediction_dataset) < 1: warnings.warn( "prediction_dataset has zero samples. No predictions will be generated." ) scores = pd.DataFrame(columns=self.classes) preds = None if binary_preds is None else pd.DataFrame( columns=self.classes) return scores, preds, prediction_dataset.unsafe_samples # SafeDataset will not fail on bad files, # but will provide a different sample! Later we go back and replace scores # with np.nan for the bad samples (using safe_dataset._unsafe_indices) # this approach to error handling feels hacky # however, returning None would break the batching of samples safe_dataset = SafeDataset(prediction_dataset, unsafe_behavior="substitute") dataloader = torch.utils.data.DataLoader( safe_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, # use pin_memory=True when loading files on CPU and training on GPU pin_memory=torch.cuda.is_available(), ) # add any paths that failed to generate a clip df to _unsafe_samples dataloader.dataset._unsafe_samples += prediction_dataset.unsafe_samples ### Prediction/Inference ### # move network to device self.network.to(self.device) self.network.eval() # initialize scores and preds total_scores = [] total_preds = [] # disable gradient updates during inference with torch.set_grad_enabled(False): for batch in dataloader: # get batch of Tensors batch_tensors = batch["X"].to(self.device) batch_tensors.requires_grad = False # forward pass of network: feature extractor + classifier logits = self.network.forward(batch_tensors) ### Activation layer ### scores = apply_activation_layer(logits, activation_layer) ### Binary predictions ### batch_preds = tensor_binary_predictions(scores=scores, mode=binary_preds, threshold=threshold) # disable gradients on returned values total_scores.append(scores.detach().cpu().numpy()) total_preds.append(batch_preds.float().detach().cpu().numpy()) # aggregate across all batches total_scores = np.concatenate(total_scores, axis=0) total_preds = np.concatenate(total_preds, axis=0) # replace scores/preds with nan for samples that failed in preprocessing # this feels hacky (we predicted on substitute-samples rather than # skipping the samples that failed preprocessing) total_scores[dataloader.dataset._unsafe_indices, :] = np.nan if binary_preds is not None: total_preds[dataloader.dataset._unsafe_indices, :] = np.nan # return 2 DataFrames with same index/columns as prediction_dataset's df # use None for placeholder if no preds samples = prediction_dataset.label_df.index.values score_df = pd.DataFrame(index=samples, data=total_scores, columns=self.classes) if split_files_into_clips: # return a multi-index score_df.index = pd.MultiIndex.from_frame( prediction_dataset.clip_times_df.reset_index()) # binary 0/1 predictions if binary_preds is None: pred_df = None else: pred_df = pd.DataFrame(index=samples, data=total_preds, columns=self.classes) if split_files_into_clips: # return a multi-index pred_df.index = pd.MultiIndex.from_frame( prediction_dataset.clip_times_df.reset_index()) print(dataloader.dataset._unsafe_samples) # warn the user if there were unsafe samples (failed to preprocess) # and log them to a file unsafe_samples = dataloader.dataset.report(log=unsafe_samples_log) return score_df, pred_df, unsafe_samples
def _set_train(self, train_df, batch_size, num_workers): """Prepare network for training on train_df Args: batch_size: number of training files to load/process before re-calculating the loss function and backpropagation num_workers: parallelization (number of cores or cpus) Effects: Sets up the optimization, loss function, and network """ ########################### # Move network to device # ########################### self.network.to(self.device) ###################### # Dataloader setup # ###################### train_dataset = AudioFileDataset(train_df, self.preprocessor) train_dataset.bypass_augmentations = False # SafeDataset loads a new sample if loading a sample throws an error # indices of bad samples are appended to ._unsafe_indices train_safe_dataset = SafeDataset(train_dataset, unsafe_behavior="substitute") # train_loader samples batches of images + labels from training set self.train_loader = self._init_dataloader( train_safe_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, ) ########################### # Setup loss function # ########################### self._init_loss_fn() ###################### # Optimization setup # ###################### # Setup optimizer parameters for each network component # Note: we re-create bc the user may have changed self.optimizer_cls # If optimizer already exists, keep the same state dict # (for instance, user may be resuming training w/saved state dict) if self.opt_net is not None: optim_state_dict = self.opt_net.state_dict() self.opt_net = self._init_optimizer() self.opt_net.load_state_dict(optim_state_dict) else: self.opt_net = self._init_optimizer() # Set up learning rate cooling schedule self.scheduler = optim.lr_scheduler.StepLR( self.opt_net, step_size=self.lr_update_interval, gamma=self.lr_cooling_factor, last_epoch=self.current_epoch - 1, )
def dataset(preprocessor): paths = ["tests/audio/veryshort.wav", "tests/audio/silence_10s.mp3"] labels = [[0, 1], [1, 0]] df = pd.DataFrame(index=paths, data=labels, columns=[0, 1]) return AudioFileDataset(df, preprocessor)
def test_overlay_tries_different_sample(dataset_df, bad_good_df): pre = SpectrogramPreprocessor(sample_duration=2.0, overlay_df=bad_good_df) dataset = AudioFileDataset(dataset_df, pre) # should try to load the bad sample, then load the good one dataset[0]["X"]
def test_spec_preprocessor_overlay(dataset_df, overlay_df, overlay_pre): dataset = AudioFileDataset(dataset_df, overlay_pre) sample1 = dataset[0]["X"] dataset.preprocessor.pipeline.overlay.bypass = True sample2 = dataset[0]["X"] assert not np.array_equal(sample1, sample2)
def test_spec_preprocessor_augment_on(dataset_df, pre): """should return different images each time""" dataset = AudioFileDataset(dataset_df, pre) sample1 = dataset[0]["X"] sample2 = dataset[0]["X"] assert not np.array_equal(sample1, sample2)
def test_spec_preprocessor_augment_off(dataset_df, pre): """should return same image each time""" dataset = AudioFileDataset(dataset_df, pre, bypass_augmentations=True) sample1 = dataset[0]["X"].numpy() sample2 = dataset[0]["X"].numpy() assert np.array_equal(sample1, sample2)
def test_return_labels_no_columns_warning(dataset_df, pre): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # raises warning bc return_labels=True but no columns in df AudioFileDataset(dataset_df[[]], pre) assert "return_labels" in str(w[0].message)
def small_dataset(dataset_df, overlay_pre): return AudioFileDataset(dataset_df, overlay_pre)
def test_overlay_specific_class(dataset_df, overlay_pre): """just make sure it runs and doesn't hang""" dataset = AudioFileDataset(dataset_df, overlay_pre) dataset.preprocessor.pipeline.overlay.set(overlay_class=1) dataset[0]["X"]
def test_overlay_with_weight_range(dataset_df, overlay_pre): """overlay should allow range [min,max] for overlay_weight""" dataset = AudioFileDataset(dataset_df, overlay_pre) dataset.preprocessor.pipeline.overlay.set(overlay_weight=[0.3, 0.7]) dataset[0]["X"]