def testConfusionMatrix(self): spec = audio_spec.BrowserFFTSpec() temp_folder = self.get_temp_dir() cat1 = write_sample(temp_folder, 'cat', '1.wav', 44100, duration_sec=1) cat2 = write_sample(temp_folder, 'cat', '2.wav', 44100, duration_sec=2) dog1 = write_sample(temp_folder, 'dog', '1.wav', 44100, duration_sec=3) dog2 = write_sample(temp_folder, 'dog', '2.wav', 44100, duration_sec=4) index_to_labels = ['cat', 'dog'] # Prepare data. ds = tf.data.Dataset.from_tensor_slices(([cat1, cat2, dog1, dog2], [0, 0, 1, 1])) data_loader = audio_dataloader.DataLoader(ds, len(ds), index_to_labels, spec) # Train a floating point model. task = audio_classifier.create(data_loader, spec, batch_size=1, epochs=15) confusion_matrx = task.confusion_matrix(data_loader) # BrowserFFTSpec generates 1 sample for 1 second audio so there are # 10 samples in total. self.assertEqual(tf.math.reduce_sum(confusion_matrx), 10) # confusion_matrix is of shape (truth, predication) # We have 2 classes, 3 cat samples and 7 dog samples. self.assertEqual(confusion_matrx.shape, (2, 2)) self.assertAllEqual( tf.math.reduce_sum(confusion_matrx, axis=-1).numpy(), np.array([3, 7]))
def test_dynamic_range_quantization(self): self._train_and_export( audio_spec.BrowserFFTSpec(), num_classes=2, filename='binary_classification.tflite', expected_model_size=1 * 1000 * 1000, quantization_config=configs.QuantizationConfig.for_dynamic())
def testBrowserFFT(self): def pcm(shape): # Convert random number between (0, 1] to int16 return np.random.rand(*shape) * (1 << 15) np.random.seed(123) spec = audio_spec.BrowserFFTSpec() dataset_shape = (1, spec.expected_waveform_len) sounds = [pcm(dataset_shape) for category in range(2)] labels = list(range(2)) index_to_labels = ['sound1', 'sound2'] ds = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(sounds), tf.data.Dataset.from_tensor_slices(labels))) ds = ds.map(spec.preprocess) data_loader = audio_dataloader.DataLoader(ds, len(ds), index_to_labels) task = audio_classifier.create(data_loader, spec, batch_size=1, epochs=100) _, acc = task.evaluate(data_loader) # Better than random guessing. self.assertGreater(acc, .5) # Export the model to saved model. saved_model_output_path = os.path.join(spec.model_dir, 'saved_model') task.export(spec.model_dir, export_format=ExportFormat.SAVED_MODEL) self.assertTrue(os.path.isdir(saved_model_output_path)) self.assertNotEqual(len(os.listdir(saved_model_output_path)), 0)
def test_spec(self): folder_path = self._get_folder_path('test_examples_helper') write_sample(folder_path, 'unknown', '2s.wav', 44100, 2, value=1) spec = audio_spec.YAMNetSpec() audio_dataloader.DataLoader.from_folder(spec, folder_path) spec = audio_spec.BrowserFFTSpec() audio_dataloader.DataLoader.from_folder(spec, folder_path)
def test_spec(self): folder_path = self._get_folder_path('test_examples_helper') spec = audio_spec.YAMNetSpec() audio_dataloader.DataLoader.from_esc50(spec, folder_path) spec = audio_spec.BrowserFFTSpec() with self.assertRaises(AssertionError): audio_dataloader.DataLoader.from_esc50(spec, folder_path)
def testBrowserFFT(self): def pcm(shape): # Convert random number between (0, 1] to int16 return np.random.rand(*shape) * (1 << 15) np.random.seed(123) tf.random.set_seed(123) # Prepare data. spec = audio_spec.BrowserFFTSpec() dataset_shape = (1, spec.expected_waveform_len) sounds = [pcm(dataset_shape) for category in range(2)] labels = list(range(2)) index_to_labels = ['sound1', 'sound2'] ds = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(sounds), tf.data.Dataset.from_tensor_slices(labels))) preprocessed_ds = ds.map(spec.preprocess) data_loader = audio_dataloader.DataLoader(preprocessed_ds, len(preprocessed_ds), index_to_labels) # Train a floating point model. task = audio_classifier.create(data_loader, spec, batch_size=1, epochs=50) # Evaluate trained model _, acc = task.evaluate(data_loader) # Better than random guessing. self.assertGreater(acc, .5) # Export the model to saved model. output_path = os.path.join(spec.model_dir, 'saved_model') task.export(spec.model_dir, export_format=ExportFormat.SAVED_MODEL) self.assertTrue(os.path.isdir(output_path)) self.assertNotEqual(len(os.listdir(output_path)), 0) # Export the model to TFLite. output_path = os.path.join(spec.model_dir, 'float.tflite') task.export( spec.model_dir, tflite_filename='float.tflite', export_format=ExportFormat.TFLITE) self.assertTrue(tf.io.gfile.exists(output_path)) self.assertGreater(os.path.getsize(output_path), 0) # Evaluate accurarcy on TFLite model. # Create a new dataset without preprocessing since preprocessing has been # packaged inside TFLite model. squeezed_ds = ds.map(lambda x, y: (tf.squeeze(tf.cast(x, tf.float32)), y)) tflite_dataloader = audio_dataloader.DataLoader(squeezed_ds, len(squeezed_ds), index_to_labels) # Evaluate accurarcy on float model. result = task.evaluate_tflite(output_path, tflite_dataloader) self.assertGreater(result['accuracy'], .5)
def testBrowserFFT(self): temp_folder = self.get_temp_dir() cat1 = write_sample(temp_folder, 'cat', '1.wav', 44100, duration_sec=1) cat2 = write_sample(temp_folder, 'cat', '2.wav', 44100, duration_sec=2) dog1 = write_sample(temp_folder, 'dog', '1.wav', 44100, duration_sec=3) dog2 = write_sample(temp_folder, 'dog', '2.wav', 44100, duration_sec=4) index_to_labels = ['cat', 'dog'] np.random.seed(123) tf.random.set_seed(123) # Prepare data. spec = audio_spec.BrowserFFTSpec() ds = tf.data.Dataset.from_tensor_slices(([cat1, cat2, dog1, dog2], [0, 0, 1, 1])) data_loader = audio_dataloader.DataLoader(ds, len(ds), index_to_labels, spec) # Train a floating point model. task = audio_classifier.create(data_loader, spec, batch_size=1, epochs=15) # Evaluate trained model _, acc = task.evaluate(data_loader) # Better than random guessing. self.assertGreater(acc, .5) # Export the model to saved model. output_path = os.path.join(spec.model_dir, 'saved_model') task.export(spec.model_dir, export_format=ExportFormat.SAVED_MODEL) self.assertTrue(os.path.isdir(output_path)) self.assertNotEqual(len(os.listdir(output_path)), 0) # Export the model to TFLite. output_path = os.path.join(spec.model_dir, 'float.tflite') task.export(spec.model_dir, tflite_filename='float.tflite', export_format=ExportFormat.TFLITE) self.assertTrue(tf.io.gfile.exists(output_path)) self.assertGreater(os.path.getsize(output_path), 0) # Evaluate accurarcy on TFLite model. # Create a new dataset without preprocessing since preprocessing has been # packaged inside TFLite model. spec = BrowserFFTWithoutPreprocessing() tflite_dataloader = audio_dataloader.DataLoader( ds, len(ds), index_to_labels, spec) # Evaluate accurarcy on float model. result = task.evaluate_tflite(output_path, tflite_dataloader) self.assertGreaterEqual(result['accuracy'], .5)
def test_basic_training(self): tflite_path = self._train_and_export( audio_spec.BrowserFFTSpec(), num_classes=5, filename='basic_5_classes_training.tflite', expected_model_size=6 * 1000 * 1000) self.assertEqual( model_util.extract_tflite_metadata_json(tflite_path), """{ "name": "AudioClassifier", "description": "Identify the most prominent type in the audio clip from a known set of categories.", "version": "v1", "subgraph_metadata": [ { "input_tensor_metadata": [ { "name": "audio_clip", "description": "Input audio clip to be classified.", "content": { "content_properties_type": "AudioProperties", "content_properties": { "sample_rate": 44100, "channels": 1 } }, "stats": { } } ], "output_tensor_metadata": [ { "name": "probability", "description": "Scores of the labels respectively.", "content": { "content_properties_type": "FeatureProperties", "content_properties": { } }, "stats": { "max": [ 1.0 ], "min": [ 0.0 ] }, "associated_files": [ { "name": "probability_labels.txt", "description": "Labels for categories that the model can recognize.", "type": "TENSOR_AXIS_LABELS" } ] } ] } ], "author": "TensorFlow Lite Model Maker", "license": "Apache License. Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.", "min_parser_version": "1.3.0" } """)
def test_binary_classification(self): self._train_and_export(audio_spec.BrowserFFTSpec(), num_classes=2, filename='binary_classification.tflite', expected_model_size=6 * 1000 * 1000)
def setUpClass(cls): super(BrowserFFTSpecTest, cls).setUpClass() cls._spec = audio_spec.BrowserFFTSpec()
def testBrowserFFT(self): self._test_spec(audio_spec.BrowserFFTSpec(), BrowserFFTWithoutPreprocessing())