def test_dynamic_range_quantization(self): self._train_and_export( audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True), num_classes=5, filename='basic_5_classes_training.tflite', expected_model_size=5 * 1000 * 1000, quantization_config=configs.QuantizationConfig.for_dynamic())
def test_from_esc50(self): folder_path = self._get_folder_path('test_examples_helper') headers = [ 'filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take' ] rows = [] rows.append( ['1-100032-A-0.wav', '1', '0', 'dog', 'True', '100032', 'A']) rows.append([ '1-100210-B-36.wav', '2', '36', 'vacuum_cleaner', 'False', '100210', 'B' ]) rows.append([ '1-100210-A-36.wav', '1', '36', 'vacuum_cleaner', 'False', '100210', 'A' ]) write_csv(folder_path, 'meta', 'esc50.csv', headers, rows) spec = audio_spec.YAMNetSpec() loader = audio_dataloader.DataLoader.from_esc50(spec, folder_path) self.assertEqual(len(loader), 3) self.assertEqual(loader.index_to_label, ['dog', 'vacuum_cleaner']) expected_results = { '1-100032-A-0.wav': 0, '1-100210-B-36.wav': 1, '1-100210-A-36.wav': 1, } for full_path, label in loader._dataset: filename = full_path.numpy().decode('utf-8').split('/')[-1] self.assertEqual(expected_results[filename], label)
def test_create_model(self): # Make sure that there is no naming conflicts in the graph. spec = audio_spec.YAMNetSpec() model = spec.create_model(10) model = spec.create_model(10) model = spec.create_model(10) self.assertEqual(model.input_shape, (None, 1024)) self.assertEqual(model.output_shape, (None, 10))
def test_yamnet_single_head(self): tflite_path = self._train_and_export( audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=False), num_classes=2, filename='single_head.tflite', expected_model_size=13 * 1000 * 1000) self.assertEqual( 1, len(model_util.get_lite_runner(tflite_path).output_details))
def test_spec(self): folder_path = self._get_folder_path('test_examples_helper') write_sample(folder_path, 'unknown', '2s.wav', 44100, 2, value=1) spec = audio_spec.YAMNetSpec() audio_dataloader.DataLoader.from_folder(spec, folder_path) spec = audio_spec.BrowserFFTSpec() audio_dataloader.DataLoader.from_folder(spec, folder_path)
def test_spec(self): folder_path = self._get_folder_path('test_examples_helper') spec = audio_spec.YAMNetSpec() audio_dataloader.DataLoader.from_esc50(spec, folder_path) spec = audio_spec.BrowserFFTSpec() with self.assertRaises(AssertionError): audio_dataloader.DataLoader.from_esc50(spec, folder_path)
def test_no_metadata(self): audio_spec.ENABLE_METADATA = False tflite_path = self._train_and_export( audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True), num_classes=2, filename='two_heads.tflite', expected_model_size=15 * 1000 * 1000) self.assertEqual( 2, len(model_util.get_lite_runner(tflite_path).output_details)) with self.assertRaisesRegex(ValueError, 'The model does not have metadata'): model_util.extract_tflite_metadata_json(tflite_path) audio_spec.ENABLE_METADATA = True
def _test_preprocess(self, input_shape, input_count, output_shape, output_count): spec = audio_spec.YAMNetSpec() wav_ds = tf.data.Dataset.from_tensor_slices([tf.ones(input_shape)] * input_count) label_ds = tf.data.Dataset.range(input_count).map( lambda x: tf.cast(x, tf.int32)) ds = tf.data.Dataset.zip((wav_ds, label_ds)) ds = spec.preprocess_ds(ds) chunks = output_count // input_count cnt = 0 for item, label in ds: cnt += 1 self.assertEqual(cnt, output_count) # More thorough checks. cnt = 0 for item, label in ds: self.assertEqual(output_shape, item.shape) self.assertEqual(label, cnt // chunks) cnt += 1
def test_binary_classification(self): self._train_and_export( audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True), num_classes=2, filename='binary_classification.tflite', expected_model_size=15 * 1000 * 1000)
def test_yamnet_single_head(self): tflite_path = self._train_and_export( audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=False), num_classes=2, filename='single_head.tflite', expected_model_size=13 * 1000 * 1000) self.assertEqual( 1, len(model_util.get_lite_runner(tflite_path).output_details)) self.assertAllEqual( [1, 2], model_util.get_lite_runner(tflite_path).output_details[0]['shape']) self.assertEqual( model_util.extract_tflite_metadata_json(tflite_path), """{ "name": "yamnet/classification", "description": "Recognizes sound events", "version": "v1", "subgraph_metadata": [ { "input_tensor_metadata": [ { "name": "audio_clip", "description": "Input audio clip to be classified.", "content": { "content_properties_type": "AudioProperties", "content_properties": { "sample_rate": 16000, "channels": 1 } }, "stats": { } } ], "output_tensor_metadata": [ { "name": "custom", "description": "Scores in range 0..1.0 for each output classes.", "content": { "content_properties_type": "FeatureProperties", "content_properties": { } }, "stats": { "max": [ 1.0 ], "min": [ 0.0 ] }, "associated_files": [ { "name": "custom_labels.txt", "description": "Labels for categories that the model can recognize.", "type": "TENSOR_AXIS_LABELS" } ] } ] } ], "author": "TensorFlow Lite Model Maker", "license": "Apache License. Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.", "min_parser_version": "1.3.0" } """)
def setUpClass(cls): super(YAMNetSpecTest, cls).setUpClass() cls._spec = audio_spec.YAMNetSpec()
def test_basic_training(self): self._train_and_export( audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True), num_classes=5, filename='basic_5_classes_training.tflite', expected_model_size=15 * 1000 * 1000)
def testYAMNet(self): self._test_spec(audio_spec.YAMNetSpec(), YAMNetWithoutPreprcessing())
def test_from_esc50(self): folder_path = self._get_folder_path('test_from_esc50') headers = [ 'filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take' ] rows = [] rows.append( ['1-100032-A-0.wav', '1', '0', 'dog', 'True', '100032', 'A']) rows.append([ '1-100210-B-36.wav', '2', '36', 'vacuum_cleaner', 'False', '100210', 'B' ]) rows.append([ '1-100210-A-36.wav', '1', '36', 'vacuum_cleaner', 'False', '100210', 'A' ]) write_csv(folder_path, 'meta', 'esc50.csv', headers, rows) spec = audio_spec.YAMNetSpec() loader = audio_dataloader.DataLoader.from_esc50(spec, folder_path) self.assertEqual(len(loader), 3) self.assertEqual(loader.index_to_label, ['dog', 'vacuum_cleaner']) expected_results = { '1-100032-A-0.wav': 0, '1-100210-B-36.wav': 1, '1-100210-A-36.wav': 1, } for full_path, label in loader._dataset: filename = full_path.numpy().decode('utf-8').split('/')[-1] self.assertEqual(expected_results[filename], label) # fitlered dataset with self.assertRaisesRegexp(ValueError, 'No audio files found'): loader = audio_dataloader.DataLoader.from_esc50(spec, folder_path, folds=[ 3, ]) with self.assertRaisesRegexp(ValueError, 'No audio files found'): loader = audio_dataloader.DataLoader.from_esc50( spec, folder_path, categories=['unknown']) loader = audio_dataloader.DataLoader.from_esc50(spec, folder_path, folds=[ 1, ]) self.assertEqual(len(loader), 2) loader = audio_dataloader.DataLoader.from_esc50( spec, folder_path, categories=['vacuum_cleaner']) self.assertEqual(len(loader), 2) loader = audio_dataloader.DataLoader.from_esc50( spec, folder_path, folds=[ 1, ], categories=['vacuum_cleaner']) self.assertEqual(len(loader), 1) loader = audio_dataloader.DataLoader.from_esc50( spec, folder_path, folds=[1, 2], categories=['vacuum_cleaner']) self.assertEqual(len(loader), 2)