Example #1
0
 def test_dynamic_range_quantization(self):
     self._train_and_export(
         audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True),
         num_classes=5,
         filename='basic_5_classes_training.tflite',
         expected_model_size=5 * 1000 * 1000,
         quantization_config=configs.QuantizationConfig.for_dynamic())
    def test_from_esc50(self):
        folder_path = self._get_folder_path('test_examples_helper')

        headers = [
            'filename', 'fold', 'target', 'category', 'esc10', 'src_file',
            'take'
        ]
        rows = []
        rows.append(
            ['1-100032-A-0.wav', '1', '0', 'dog', 'True', '100032', 'A'])
        rows.append([
            '1-100210-B-36.wav', '2', '36', 'vacuum_cleaner', 'False',
            '100210', 'B'
        ])
        rows.append([
            '1-100210-A-36.wav', '1', '36', 'vacuum_cleaner', 'False',
            '100210', 'A'
        ])
        write_csv(folder_path, 'meta', 'esc50.csv', headers, rows)

        spec = audio_spec.YAMNetSpec()
        loader = audio_dataloader.DataLoader.from_esc50(spec, folder_path)

        self.assertEqual(len(loader), 3)
        self.assertEqual(loader.index_to_label, ['dog', 'vacuum_cleaner'])

        expected_results = {
            '1-100032-A-0.wav': 0,
            '1-100210-B-36.wav': 1,
            '1-100210-A-36.wav': 1,
        }
        for full_path, label in loader._dataset:
            filename = full_path.numpy().decode('utf-8').split('/')[-1]
            self.assertEqual(expected_results[filename], label)
Example #3
0
 def test_create_model(self):
     # Make sure that there is no naming conflicts in the graph.
     spec = audio_spec.YAMNetSpec()
     model = spec.create_model(10)
     model = spec.create_model(10)
     model = spec.create_model(10)
     self.assertEqual(model.input_shape, (None, 1024))
     self.assertEqual(model.output_shape, (None, 10))
Example #4
0
 def test_yamnet_single_head(self):
     tflite_path = self._train_and_export(
         audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=False),
         num_classes=2,
         filename='single_head.tflite',
         expected_model_size=13 * 1000 * 1000)
     self.assertEqual(
         1, len(model_util.get_lite_runner(tflite_path).output_details))
  def test_spec(self):
    folder_path = self._get_folder_path('test_examples_helper')
    write_sample(folder_path, 'unknown', '2s.wav', 44100, 2, value=1)

    spec = audio_spec.YAMNetSpec()
    audio_dataloader.DataLoader.from_folder(spec, folder_path)

    spec = audio_spec.BrowserFFTSpec()
    audio_dataloader.DataLoader.from_folder(spec, folder_path)
Example #6
0
    def test_spec(self):
        folder_path = self._get_folder_path('test_examples_helper')

        spec = audio_spec.YAMNetSpec()
        audio_dataloader.DataLoader.from_esc50(spec, folder_path)

        spec = audio_spec.BrowserFFTSpec()
        with self.assertRaises(AssertionError):
            audio_dataloader.DataLoader.from_esc50(spec, folder_path)
Example #7
0
 def test_no_metadata(self):
   audio_spec.ENABLE_METADATA = False
   tflite_path = self._train_and_export(
       audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True),
       num_classes=2,
       filename='two_heads.tflite',
       expected_model_size=15 * 1000 * 1000)
   self.assertEqual(
       2, len(model_util.get_lite_runner(tflite_path).output_details))
   with self.assertRaisesRegex(ValueError, 'The model does not have metadata'):
     model_util.extract_tflite_metadata_json(tflite_path)
   audio_spec.ENABLE_METADATA = True
Example #8
0
    def _test_preprocess(self, input_shape, input_count, output_shape,
                         output_count):
        spec = audio_spec.YAMNetSpec()
        wav_ds = tf.data.Dataset.from_tensor_slices([tf.ones(input_shape)] *
                                                    input_count)
        label_ds = tf.data.Dataset.range(input_count).map(
            lambda x: tf.cast(x, tf.int32))

        ds = tf.data.Dataset.zip((wav_ds, label_ds))
        ds = spec.preprocess_ds(ds)

        chunks = output_count // input_count

        cnt = 0
        for item, label in ds:
            cnt += 1
        self.assertEqual(cnt, output_count)

        # More thorough checks.
        cnt = 0
        for item, label in ds:
            self.assertEqual(output_shape, item.shape)
            self.assertEqual(label, cnt // chunks)
            cnt += 1
Example #9
0
 def test_binary_classification(self):
     self._train_and_export(
         audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True),
         num_classes=2,
         filename='binary_classification.tflite',
         expected_model_size=15 * 1000 * 1000)
Example #10
0
    def test_yamnet_single_head(self):
        tflite_path = self._train_and_export(
            audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=False),
            num_classes=2,
            filename='single_head.tflite',
            expected_model_size=13 * 1000 * 1000)
        self.assertEqual(
            1, len(model_util.get_lite_runner(tflite_path).output_details))
        self.assertAllEqual(
            [1, 2],
            model_util.get_lite_runner(tflite_path).output_details[0]['shape'])
        self.assertEqual(
            model_util.extract_tflite_metadata_json(tflite_path), """{
  "name": "yamnet/classification",
  "description": "Recognizes sound events",
  "version": "v1",
  "subgraph_metadata": [
    {
      "input_tensor_metadata": [
        {
          "name": "audio_clip",
          "description": "Input audio clip to be classified.",
          "content": {
            "content_properties_type": "AudioProperties",
            "content_properties": {
              "sample_rate": 16000,
              "channels": 1
            }
          },
          "stats": {
          }
        }
      ],
      "output_tensor_metadata": [
        {
          "name": "custom",
          "description": "Scores in range 0..1.0 for each output classes.",
          "content": {
            "content_properties_type": "FeatureProperties",
            "content_properties": {
            }
          },
          "stats": {
            "max": [
              1.0
            ],
            "min": [
              0.0
            ]
          },
          "associated_files": [
            {
              "name": "custom_labels.txt",
              "description": "Labels for categories that the model can recognize.",
              "type": "TENSOR_AXIS_LABELS"
            }
          ]
        }
      ]
    }
  ],
  "author": "TensorFlow Lite Model Maker",
  "license": "Apache License. Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.",
  "min_parser_version": "1.3.0"
}
""")
Example #11
0
 def setUpClass(cls):
     super(YAMNetSpecTest, cls).setUpClass()
     cls._spec = audio_spec.YAMNetSpec()
Example #12
0
 def test_basic_training(self):
     self._train_and_export(
         audio_spec.YAMNetSpec(keep_yamnet_and_custom_heads=True),
         num_classes=5,
         filename='basic_5_classes_training.tflite',
         expected_model_size=15 * 1000 * 1000)
Example #13
0
 def testYAMNet(self):
     self._test_spec(audio_spec.YAMNetSpec(), YAMNetWithoutPreprcessing())
    def test_from_esc50(self):
        folder_path = self._get_folder_path('test_from_esc50')

        headers = [
            'filename', 'fold', 'target', 'category', 'esc10', 'src_file',
            'take'
        ]
        rows = []
        rows.append(
            ['1-100032-A-0.wav', '1', '0', 'dog', 'True', '100032', 'A'])
        rows.append([
            '1-100210-B-36.wav', '2', '36', 'vacuum_cleaner', 'False',
            '100210', 'B'
        ])
        rows.append([
            '1-100210-A-36.wav', '1', '36', 'vacuum_cleaner', 'False',
            '100210', 'A'
        ])
        write_csv(folder_path, 'meta', 'esc50.csv', headers, rows)

        spec = audio_spec.YAMNetSpec()
        loader = audio_dataloader.DataLoader.from_esc50(spec, folder_path)

        self.assertEqual(len(loader), 3)
        self.assertEqual(loader.index_to_label, ['dog', 'vacuum_cleaner'])

        expected_results = {
            '1-100032-A-0.wav': 0,
            '1-100210-B-36.wav': 1,
            '1-100210-A-36.wav': 1,
        }
        for full_path, label in loader._dataset:
            filename = full_path.numpy().decode('utf-8').split('/')[-1]
            self.assertEqual(expected_results[filename], label)

        # fitlered dataset
        with self.assertRaisesRegexp(ValueError, 'No audio files found'):
            loader = audio_dataloader.DataLoader.from_esc50(spec,
                                                            folder_path,
                                                            folds=[
                                                                3,
                                                            ])

        with self.assertRaisesRegexp(ValueError, 'No audio files found'):
            loader = audio_dataloader.DataLoader.from_esc50(
                spec, folder_path, categories=['unknown'])

        loader = audio_dataloader.DataLoader.from_esc50(spec,
                                                        folder_path,
                                                        folds=[
                                                            1,
                                                        ])
        self.assertEqual(len(loader), 2)

        loader = audio_dataloader.DataLoader.from_esc50(
            spec, folder_path, categories=['vacuum_cleaner'])
        self.assertEqual(len(loader), 2)

        loader = audio_dataloader.DataLoader.from_esc50(
            spec, folder_path, folds=[
                1,
            ], categories=['vacuum_cleaner'])
        self.assertEqual(len(loader), 1)

        loader = audio_dataloader.DataLoader.from_esc50(
            spec, folder_path, folds=[1, 2], categories=['vacuum_cleaner'])
        self.assertEqual(len(loader), 2)