def test_check_params(art_warning): try: with pytest.raises(ValueError): _ = LFilterPyTorch(numerator_coef=np.array([0.1, 0.2, -0.1, -0.2]), denominator_coef=[0.0, 0.1, 0.3, 0.4]) with pytest.raises(ValueError): _ = LFilterPyTorch(numerator_coef=np.array([0.1, 0.2, -0.1, -0.2]), denominator_coef=np.array([0.0, 0.1, 0.3, 0.4])) with pytest.raises(ValueError): _ = LFilterPyTorch(numerator_coef=[0.1, 0.2, -0.1, -0.2], denominator_coef=np.array([1.0, 0.1, 0.3, 0.4])) with pytest.raises(ValueError): _ = LFilterPyTorch( numerator_coef=np.array([0.1, 0.2, -0.1, -0.2]), denominator_coef=np.array([1.0, 0.1, 0.3, 0.4, 0.2]), ) with pytest.raises(ValueError): _ = LFilterPyTorch( numerator_coef=np.array([0.1, 0.2, -0.1, -0.2]), denominator_coef=np.array([1.0, 0.1, 0.3, 0.4]), verbose="True", ) except ARTTestException as e: art_warning(e)
def test_relation_clip_values_error(art_warning): try: exc_msg = "Invalid `clip_values`: min >= max." with pytest.raises(ValueError, match=exc_msg): LFilterPyTorch(numerator_coef=np.array([0.1, 0.2, 0.3]), denominator_coef=np.array([0.1, 0.2, 0.3]), clip_values=(1, 0)) except ARTTestException as e: art_warning(e)
def test_triple_clip_values_error(art_warning): try: exc_msg = "`clip_values` should be a tuple of 2 floats containing the allowed data range." with pytest.raises(ValueError, match=exc_msg): LFilterPyTorch( numerator_coef=np.array([0.1, 0.2, 0.3]), denominator_coef=np.array([0.1, 0.2, 0.3]), clip_values=(0, 1, 2), ) except ARTTestException as e: art_warning(e)
def test_audio_filter(fir_filter, art_warning, expected_values): try: # Load data for testing expected_data = expected_values() x1 = expected_data[0] x2 = expected_data[1] x3 = expected_data[2] result_0 = expected_data[3] result_1 = expected_data[4] result_2 = expected_data[5] # Create signal data x = np.array( [ np.array(x1 * 2, dtype=ART_NUMPY_DTYPE), np.array(x2 * 2, dtype=ART_NUMPY_DTYPE), np.array(x3 * 2, dtype=ART_NUMPY_DTYPE), ], dtype=object, ) # Filter params numerator_coef = np.array([0.1, 0.2, -0.1, -0.2], dtype=ART_NUMPY_DTYPE) if fir_filter: denominator_coef = np.array([1.0, 0.0, 0.0, 0.0], dtype=ART_NUMPY_DTYPE) else: denominator_coef = np.array([1.0, 0.1, 0.3, 0.4], dtype=ART_NUMPY_DTYPE) # Create filter audio_filter = LFilterPyTorch(numerator_coef=numerator_coef, denominator_coef=denominator_coef) # Apply filter result = audio_filter(x) # Test assert result[1] is None np.testing.assert_array_almost_equal(result_0, result[0][0], decimal=0) np.testing.assert_array_almost_equal(result_1, result[0][1], decimal=0) np.testing.assert_array_almost_equal(result_2, result[0][2], decimal=0) except ARTTestException as e: art_warning(e)
def test_default(art_warning): try: # Small data for testing x = np.array([[0.37, 0.68, 0.63, 0.48, 0.48, 0.18, 0.19]], dtype=ART_NUMPY_DTYPE) # Create filter audio_filter = LFilterPyTorch() # Apply filter result = audio_filter(x) # Test assert result[1] is None np.testing.assert_array_almost_equal(x, result[0], decimal=0) except ARTTestException as e: art_warning(e)
def load_audio_channel(delay, attenuation, pytorch=True): """ Return an art LFilter object for a simple delay (multipath) channel If attenuation == 0 or delay == 0, return an identity channel Otherwise, return a channel with length equal to delay + 1 NOTE: lfilter truncates the end of the echo, so output length equals input length """ delay = int(delay) attenuation = float(attenuation) if delay < 0: raise ValueError( f"delay {delay} must be a nonnegative number (of samples)") if delay == 0 or attenuation == 0: logger.warning("Using an identity channel") numerator_coef = np.array([1.0]) denominator_coef = np.array([1.0]) else: if not (-1 <= attenuation <= 1): logger.warning(f"filter attenuation {attenuation} not in [-1, 1]") # Simple FIR filter with a single multipath delay numerator_coef = np.zeros(delay + 1) numerator_coef[0] = 1.0 numerator_coef[delay] = attenuation denominator_coef = np.zeros_like(numerator_coef) denominator_coef[0] = 1.0 if pytorch: try: return LFilterPyTorch(numerator_coef=numerator_coef, denominator_coef=denominator_coef) except ImportError: logger.exception( "PyTorch not available. Resorting to scipy filter") logger.warning( "Scipy LFilter does not currently implement proper gradients") return LFilter(numerator_coef=numerator_coef, denominator_coef=denominator_coef)
def test_imperceptible_asr_pytorch(art_warning, expected_values, use_amp, device_type): # Only import if deepspeech_pytorch module is available import torch from art.estimators.speech_recognition.pytorch_deep_speech import PyTorchDeepSpeech from art.attacks.evasion.imperceptible_asr.imperceptible_asr_pytorch import ImperceptibleASRPyTorch from art.preprocessing.audio import LFilterPyTorch try: # Skip test if gpu is not available and use_amp is true if use_amp and not torch.cuda.is_available(): return # Load data for testing expected_data = expected_values() x1 = expected_data["x1"] x2 = expected_data["x2"] x3 = expected_data["x3"] # Create signal data x = np.array([ np.array(x1 * 200, dtype=ART_NUMPY_DTYPE), np.array(x2 * 200, dtype=ART_NUMPY_DTYPE), np.array(x3 * 200, dtype=ART_NUMPY_DTYPE), ]) # Create labels y = np.array(["S", "I", "GD"]) # Create DeepSpeech estimator with preprocessing numerator_coef = np.array( [0.0000001, 0.0000002, -0.0000001, -0.0000002], dtype=ART_NUMPY_DTYPE) denominator_coef = np.array([1.0, 0.0, 0.0, 0.0], dtype=ART_NUMPY_DTYPE) audio_filter = LFilterPyTorch(numerator_coef=numerator_coef, denominator_coef=denominator_coef, device_type=device_type) speech_recognizer = PyTorchDeepSpeech( pretrained_model="librispeech", device_type=device_type, use_amp=use_amp, preprocessing_defences=audio_filter, ) # Create attack asr_attack = ImperceptibleASRPyTorch( estimator=speech_recognizer, eps=0.001, max_iter_1=5, max_iter_2=5, learning_rate_1=0.00001, learning_rate_2=0.001, optimizer_1=torch.optim.Adam, optimizer_2=torch.optim.Adam, global_max_length=3200, initial_rescale=1.0, decrease_factor_eps=0.8, num_iter_decrease_eps=5, alpha=0.01, increase_factor_alpha=1.2, num_iter_increase_alpha=5, decrease_factor_alpha=0.8, num_iter_decrease_alpha=5, win_length=2048, hop_length=512, n_fft=2048, batch_size=2, use_amp=use_amp, opt_level="O1", ) # Test transcription output transcriptions_preprocessing = speech_recognizer.predict( x, batch_size=2, transcription_output=True) expected_transcriptions = np.array(["", "", ""]) assert (expected_transcriptions == transcriptions_preprocessing).all() # Generate attack x_adv_preprocessing = asr_attack.generate(x, y) # Test shape assert x_adv_preprocessing[0].shape == x[0].shape assert x_adv_preprocessing[1].shape == x[1].shape assert x_adv_preprocessing[2].shape == x[2].shape # Test content assert not (x_adv_preprocessing[0] == x[0]).all() assert not (x_adv_preprocessing[1] == x[1]).all() assert not (x_adv_preprocessing[2] == x[2]).all() assert np.sum(x_adv_preprocessing[0]) != np.inf assert np.sum(x_adv_preprocessing[1]) != np.inf assert np.sum(x_adv_preprocessing[2]) != np.inf assert np.sum(x_adv_preprocessing[0]) != 0 assert np.sum(x_adv_preprocessing[1]) != 0 assert np.sum(x_adv_preprocessing[2]) != 0 except ARTTestException as e: art_warning(e)