Beispiel #1
0
  def test_get(self):
    spec = ms.get('mobilenet_v2')
    self.assertIsInstance(spec, image_spec.ImageModelSpec)

    spec = ms.get('average_word_vec')
    self.assertIsInstance(spec, text_spec.AverageWordVecModelSpec)

    spec = ms.get(image_spec.mobilenet_v2_spec)
    self.assertIsInstance(spec, image_spec.ImageModelSpec)
Beispiel #2
0
    def test_get(self):
        spec = ms.get('mobilenet_v2')
        self.assertIsInstance(spec, ms.ImageModelSpec)

        spec = ms.get('average_word_vec')
        self.assertIsInstance(spec, ms.AverageWordVecModelSpec)

        spec = ms.get(ms.mobilenet_v2_spec)
        self.assertIsInstance(spec, ms.ImageModelSpec)

        with self.assertRaises(KeyError):
            ms.get('not_exist_model_spec')
Beispiel #3
0
def run(train_data_path,
        validation_data_path,
        export_dir,
        spec='bert_qa',
        **kwargs):
    """Runs demo."""
    # Chooses model specification that represents model.
    spec = model_spec.get(spec)

    # Gets training data and validation data.
    train_data = QuestionAnswerDataLoader.from_squad(train_data_path,
                                                     spec,
                                                     is_training=True)
    validation_data = QuestionAnswerDataLoader.from_squad(validation_data_path,
                                                          spec,
                                                          is_training=False)

    # Fine-tunes the model.
    model = question_answer.create(train_data, model_spec=spec, **kwargs)

    # Gets evaluation results.
    metric = model.evaluate(validation_data)
    tf.compat.v1.logging.info('Eval F1 score:%f' % metric['final_f1'])

    # Exports to TFLite format.
    model.export(export_dir)
Beispiel #4
0
    def testEfficientDetLite0(self):
        # Gets model specification.
        spec = model_spec.get('efficientdet_lite0')

        # Prepare data.
        images_dir, annotations_dir, label_map = test_util.create_pascal_voc(
            self.get_temp_dir())
        data = object_detector_dataloader.DataLoader.from_pascal_voc(
            images_dir, annotations_dir, label_map)

        # Train the model.
        task = object_detector.create(data, spec, batch_size=1, epochs=1)
        self.assertEqual(spec.config.num_classes, 2)

        # Evaluate trained model
        metrics = task.evaluate(data)
        self.assertIsInstance(metrics, dict)
        self.assertGreaterEqual(metrics['AP'], 0)

        # Export the model to saved model.
        output_path = os.path.join(self.get_temp_dir(), 'saved_model')
        task.export(self.get_temp_dir(),
                    export_format=ExportFormat.SAVED_MODEL)
        self.assertTrue(os.path.isdir(output_path))
        self.assertNotEqual(len(os.listdir(output_path)), 0)

        # Export the model to the float TFLite model.
        output_path = os.path.join(self.get_temp_dir(), 'float.tflite')
        task.export(self.get_temp_dir(),
                    tflite_filename='float.tflite',
                    quantization_config=None,
                    export_format=ExportFormat.TFLITE,
                    with_metadata=True,
                    export_metadata_json_file=True)
        # Checks the sizes of the float32 TFLite model files in bytes.
        model_size = 13476379
        self.assertNear(os.path.getsize(output_path), model_size, 50000)

        json_output_file = os.path.join(self.get_temp_dir(), 'float.json')
        self.assertTrue(os.path.isfile(json_output_file))
        self.assertGreater(os.path.getsize(json_output_file), 0)
        expected_json_file = test_util.get_test_data_path(
            'efficientdet_lite0_metadata.json')
        self.assertTrue(filecmp.cmp(json_output_file, expected_json_file))

        # Evaluate the TFLite model.
        task.evaluate_tflite(output_path, data)
        self.assertIsInstance(metrics, dict)
        self.assertGreaterEqual(metrics['AP'], 0)

        # Tests the default quantized model.
        filename = 'model_quant.tflite'
        output_path = os.path.join(self.get_temp_dir(), filename)
        task.export(self.get_temp_dir(),
                    tflite_filename=filename,
                    export_format=ExportFormat.TFLITE)
        model_size = 4312187
        err = model_size * 0.05
        self.assertTrue(os.path.isfile(output_path))
        self.assertNear(os.path.getsize(output_path), model_size, err)
Beispiel #5
0
    def create(cls,
               train_data,
               model_spec,
               batch_size=None,
               epochs=2,
               shuffle=False,
               do_train=True):
        """Loads data and train the model for question answer.

    Args:
      train_data: Training data.
      model_spec: Specification for the model.
      batch_size: Batch size for training.
      epochs: Number of epochs for training.
      shuffle: Whether the data should be shuffled.
      do_train: Whether to run training.

    Returns:
      An instance based on QuestionAnswer.
    """
        model_spec = ms.get(model_spec)
        if compat.get_tf_behavior() not in model_spec.compat_tf_versions:
            raise ValueError(
                'Incompatible versions. Expect {}, but got {}.'.format(
                    model_spec.compat_tf_versions, compat.get_tf_behavior()))

        model = cls(model_spec, shuffle=shuffle)

        if do_train:
            tf.compat.v1.logging.info('Retraining the models...')
            model.train(train_data, epochs, batch_size)
        else:
            model.create_model()

        return model
Beispiel #6
0
def run(spec, data_dir, dataset_type, export_dir, **kwargs):
    """Runs demo."""
    spec = model_spec.get(spec)

    if dataset_type == 'esc50':
        # Limit to 2 categories to speed up the demo
        categories = ['dog', 'cat']
        train_data = audio_dataloader.DataLoader.from_esc50(
            spec, data_dir, folds=[0, 1, 2, 3], categories=categories)
        validation_data = audio_dataloader.DataLoader.from_esc50(
            spec, data_dir, folds=[
                4,
            ], categories=categories)
        test_data = audio_dataloader.DataLoader.from_esc50(
            spec, data_dir, folds=[
                5,
            ], categories=categories)

    else:
        data = audio_dataloader.DataLoader.from_folder(spec, data_dir)
        train_data, rest_data = data.split(0.8)
        validation_data, test_data = rest_data.split(0.5)

    print('Training the model')
    model = audio_classifier.create(train_data, spec, validation_data,
                                    **kwargs)

    print('Evaluating the model')
    _, acc = model.evaluate(test_data)
    print('Test accuracy: %f' % acc)

    model.export(export_dir)
  def test_export_and_evaluation(self):
    model_dir = os.path.join(self.test_tempdir, 'recommendation_export')
    model_spec = ms.get(
        'recommendation',
        input_spec=self.input_spec,
        model_hparams=self.model_hparams)
    model = recommendation.create(
        self.train_loader,
        model_spec=model_spec,
        model_dir=model_dir,
        steps_per_epoch=1)
    export_format = [
        ExportFormat.TFLITE,
        ExportFormat.SAVED_MODEL,
    ]
    model.export(model_dir, export_format=export_format)
    # Expect tflite file.
    expected_tflite = os.path.join(model_dir, 'model.tflite')
    self.assertTrue(os.path.exists(expected_tflite))
    self.assertGreater(os.path.getsize(expected_tflite), 0)

    # Expect saved model.
    expected_saved_model = os.path.join(model_dir, 'saved_model',
                                        'saved_model.pb')
    self.assertTrue(os.path.exists(expected_saved_model))
    self.assertGreater(os.path.getsize(expected_saved_model), 0)

    # Evaluate tflite model.
    self._test_evaluate_tflite(model, expected_tflite)
Beispiel #8
0
def create(train_data,
           model_spec,
           validation_data=None,
           epochs=None,
           batch_size=None,
           do_train=True):
    """Loads data and train the model for object detection.

  Args:
    train_data: Training data.
    model_spec: Specification for the model.
    validation_data: Validation data. If None, skips validation process.
    epochs: Number of epochs for training.
    batch_size: Batch size for training.
    do_train: Whether to run training.

  Returns:
    ObjectDetector
  """
    model_spec = ms.get(model_spec)
    if compat.get_tf_behavior() not in model_spec.compat_tf_versions:
        raise ValueError(
            'Incompatible versions. Expect {}, but got {}.'.format(
                model_spec.compat_tf_versions, compat.get_tf_behavior()))

    object_detector = ObjectDetector(model_spec, train_data.label_map)

    if do_train:
        tf.compat.v1.logging.info('Retraining the models...')
        object_detector.train(train_data, validation_data, epochs, batch_size)
    else:
        object_detector.create_model()

    return object_detector
Beispiel #9
0
def run(data_dir, export_dir, spec='bert_classifier', **kwargs):
    """Runs demo."""
    # Chooses model specification that represents model.
    spec = model_spec.get(spec)

    # Gets training data and validation data.
    train_data = TextClassifierDataLoader.from_csv(filename=os.path.join(
        os.path.join(data_dir, 'train.tsv')),
                                                   text_column='sentence',
                                                   label_column='label',
                                                   model_spec=spec,
                                                   delimiter='\t',
                                                   is_training=True)
    validation_data = TextClassifierDataLoader.from_csv(filename=os.path.join(
        os.path.join(data_dir, 'dev.tsv')),
                                                        text_column='sentence',
                                                        label_column='label',
                                                        model_spec=spec,
                                                        delimiter='\t',
                                                        is_training=False)

    # Fine-tunes the model.
    model = text_classifier.create(train_data,
                                   model_spec=spec,
                                   validation_data=validation_data,
                                   **kwargs)

    # Gets evaluation results.
    _, acc = model.evaluate(validation_data)
    print('Eval accuracy: %f' % acc)

    # Exports to TFLite format.
    model.export(export_dir)
Beispiel #10
0
def create(train_data,
           model_spec,
           model_spec_options=None,
           model_dir=None,
           validation_data=None,
           batch_size=16,
           steps_per_epoch=10000,
           epochs=1,
           learning_rate=0.1,
           gradient_clip_norm=1.0,
           shuffle=True,
           do_train=True,
           max_history_length=10):
    """Loads data and train the model for recommendation.

  Args:
    train_data: Training data.
    model_spec: Specification for the model.
    model_spec_options: dict, additional options to creat a model.
    model_dir: str, path to export model checkpoints and summaries.
    validation_data: Validation data.
    batch_size: Batch size for training.
    steps_per_epoch: int, Number of step per epoch.
    epochs: int, Number of epochs for training.
    learning_rate: float, learning rate.
    gradient_clip_norm: float, clip threshold (<= 0 meaning no clip).
    shuffle: boolean, whether the training data should be shuffled.
    do_train: boolean, whether to run training.
    max_history_length: int, max history length as model input (for inference).

  Returns:
    object of QuestionAnswer class.
  """
    # Create model spec.
    if model_spec_options is None:
        model_spec_options = {}
    model_spec = ms.get(model_spec)(**model_spec_options)

    # Use model_dir or a temp folder to store intermediate checkpoints, etc.
    if model_dir is None:
        model_dir = tempfile.mkdtemp()

    recommendation = Recommendation(model_spec,
                                    model_dir=model_dir,
                                    shuffle=shuffle,
                                    max_history_length=max_history_length,
                                    learning_rate=learning_rate,
                                    gradient_clip_norm=gradient_clip_norm)

    if do_train:
        tf.compat.v1.logging.info('Training recommendation model...')
        recommendation.train(train_data,
                             validation_data,
                             batch_size=batch_size,
                             steps_per_epoch=steps_per_epoch,
                             epochs=epochs)
    else:
        recommendation.create_model(do_train=False)
    return recommendation
    def testEfficientDetLite0(self):
        # Gets model specification.
        spec = model_spec.get('efficientdet_lite0')

        # Prepare data.
        images_dir, annotations_dir, label_map = test_util.create_pascal_voc(
            self.get_temp_dir())
        data = object_detector_dataloader.DataLoader.from_pascal_voc(
            images_dir, annotations_dir, label_map)

        # Train the model.
        task = object_detector.create(data, spec, batch_size=1, epochs=1)
        self.assertEqual(spec.config.num_classes, 2)

        # Evaluate trained model
        metrics = task.evaluate(data, batch_size=1)
        self.assertIsInstance(metrics, dict)
        self.assertGreaterEqual(metrics['AP'], 0)

        # Export the model to saved model.
        output_path = os.path.join(self.get_temp_dir(), 'saved_model')
        task.export(self.get_temp_dir(),
                    export_format=ExportFormat.SAVED_MODEL)
        self.assertTrue(os.path.isdir(output_path))
        self.assertNotEqual(len(os.listdir(output_path)), 0)

        # Export the model to TFLite model.
        output_path = os.path.join(self.get_temp_dir(), 'float.tflite')
        task.export(self.get_temp_dir(),
                    tflite_filename='float.tflite',
                    export_format=ExportFormat.TFLITE,
                    with_metadata=True,
                    export_metadata_json_file=True)
        self.assertTrue(tf.io.gfile.exists(output_path))
        self.assertGreater(os.path.getsize(output_path), 0)

        json_output_file = os.path.join(self.get_temp_dir(), 'float.json')
        self.assertTrue(os.path.isfile(json_output_file))
        self.assertGreater(os.path.getsize(json_output_file), 0)
        expected_json_file = test_util.get_test_data_path(
            'efficientdet_lite0_metadata.json')
        self.assertTrue(filecmp.cmp(json_output_file, expected_json_file))

        # Export the model to quantized TFLite model.
        # TODO(b/175173304): Skips the test for stable tensorflow 2.4 for now since
        # it fails. Will revert this change after TF upgrade.
        if tf.__version__.startswith('2.4'):
            return
        output_path = os.path.join(self.get_temp_dir(),
                                   'model_quantized.tflite')
        config = configs.QuantizationConfig.create_full_integer_quantization(
            data, is_integer_only=True)
        task.export(self.get_temp_dir(),
                    tflite_filename='model_quantized.tflite',
                    quantization_config=config,
                    export_format=ExportFormat.TFLITE)
        self.assertTrue(os.path.isfile(output_path))
        self.assertGreater(os.path.getsize(output_path), 0)
 def test_mobilebert_model(self, spec, trainable):
   # Only test squad1.1 since it takes too long time for this.
   version = '1.1'
   model_spec = ms.get(spec)
   model_spec.trainable = trainable
   model_spec.predict_batch_size = 1
   train_data, validation_data = _get_data(model_spec, version)
   model = question_answer.create(
       train_data, model_spec=model_spec, epochs=1, batch_size=1)
   self._test_f1_score(model, validation_data, 0.0)
   self._test_export_to_tflite(model, validation_data, atol=1e-02)
 def test_evaluate(self):
   model_dir = os.path.join(self.test_tempdir, 'recommendation_evaluate')
   model_spec = ms.get(
       'recommendation',
       input_spec=self.input_spec,
       model_hparams=self.model_hparams)
   model = recommendation.create(
       self.train_loader,
       model_spec=model_spec,
       model_dir=model_dir,
       steps_per_epoch=1)
   history = model.evaluate(self.test_loader)
   self.assertIsInstance(history, list)
   self.assertTrue(history)  # Non-empty list.
 def test_mobilebert_model(self, spec):
     self.skipTest('TODO(b/164095081): Fix breakage and re-enable')
     # Only test squad1.1 since it takes too long time for this.
     version = '1.1'
     model_spec = ms.get(spec)
     model_spec.trainable = False
     model_spec.predict_batch_size = 1
     train_data, validation_data = _get_data(model_spec, version)
     model = question_answer.create(train_data,
                                    model_spec=model_spec,
                                    epochs=1,
                                    batch_size=1)
     self._test_f1_score(model, validation_data, 0.0)
     self._test_export_to_tflite(model, validation_data, atol=1e-02)
  def test_create(self, encoder_type):
    model_dir = os.path.join(self.test_tempdir, 'recommendation_create')
    input_spec = _testutil.get_input_spec(encoder_type)

    model_spec = ms.get(
        'recommendation',
        input_spec=input_spec,
        model_hparams=self.model_hparams)
    model = recommendation.create(
        self.train_loader,
        model_spec=model_spec,
        model_dir=model_dir,
        steps_per_epoch=1)
    self.assertIsNotNone(model.model)
def run(data_dir, export_dir, spec='efficientnet_lite0', **kwargs):
    """Runs demo."""
    spec = model_spec.get(spec)
    data = ImageClassifierDataLoader.from_folder(data_dir)
    train_data, rest_data = data.split(0.8)
    validation_data, test_data = rest_data.split(0.5)

    model = image_classifier.create(train_data,
                                    model_spec=spec,
                                    validation_data=validation_data,
                                    **kwargs)

    _, acc = model.evaluate(test_data)
    print('Test accuracy: %f' % acc)
    model.export(export_dir)
Beispiel #17
0
def run(data_dir, export_dir, spec='audio_browser_fft', **kwargs):
  """Runs demo."""
  spec = model_spec.get(spec)
  data = audio_dataloader.DataLoader.from_folder(spec, data_dir)

  train_data, rest_data = data.split(0.8)
  validation_data, test_data = rest_data.split(0.5)

  print('Training the model')
  model = audio_classifier.create(train_data, spec, validation_data, **kwargs)

  print('Evaluating the model')
  _, acc = model.evaluate(test_data)
  print('Test accuracy: %f' % acc)

  model.export(export_dir)
Beispiel #18
0
    def create(cls,
               train_data: object_detector_dataloader.DataLoader,
               model_spec: object_detector_spec.EfficientDetModelSpec,
               validation_data: Optional[
                   object_detector_dataloader.DataLoader] = None,
               epochs: Optional[object_detector_dataloader.DataLoader] = None,
               batch_size: Optional[int] = None,
               train_whole_model: bool = False,
               do_train: bool = True) -> T:
        """Loads data and train the model for object detection.

    Args:
      train_data: Training data.
      model_spec: Specification for the model.
      validation_data: Validation data. If None, skips validation process.
      epochs: Number of epochs for training.
      batch_size: Batch size for training.
      train_whole_model: Boolean, False by default. If true, train the whole
        model. Otherwise, only train the layers that are not match
        `model_spec.config.var_freeze_expr`.
      do_train: Whether to run training.

    Returns:
      An instance based on ObjectDetector.
    """
        model_spec = ms.get(model_spec)
        if epochs is not None:
            model_spec.config.num_epochs = epochs
        if batch_size is not None:
            model_spec.config.batch_size = batch_size
        if train_whole_model:
            model_spec.config.var_freeze_expr = None
        if compat.get_tf_behavior() not in model_spec.compat_tf_versions:
            raise ValueError(
                'Incompatible versions. Expect {}, but got {}.'.format(
                    model_spec.compat_tf_versions, compat.get_tf_behavior()))

        object_detector = cls(model_spec, train_data.label_map, train_data)

        if do_train:
            tf.compat.v1.logging.info('Retraining the models...')
            object_detector.train(train_data, validation_data, epochs,
                                  batch_size)
        else:
            object_detector.create_model()

        return object_detector
Beispiel #19
0
    def from_squad(cls,
                   filename,
                   model_spec,
                   is_training=True,
                   version_2_with_negative=False,
                   cache_dir=None):
        """Loads data in SQuAD format and preproecess text according to `model_spec`.

    Args:
      filename: Name of the file.
      model_spec: Specification for the model.
      is_training: Whether the loaded data is for training or not.
      version_2_with_negative: Whether it's SQuAD 2.0 format.
      cache_dir: The cache directory to save preprocessed data. If None,
        generates a temporary directory to cache preprocessed data.

    Returns:
      QuestionAnswerDataLoader object.
    """
        model_spec = ms.get(model_spec)
        file_base_name = os.path.basename(filename)
        is_cached, tfrecord_file, meta_data_file, _ = _get_cache_info(
            cache_dir, file_base_name, model_spec, is_training)
        # If cached, directly loads data from cache directory.
        if is_cached and is_training:
            dataset, meta_data = _load(tfrecord_file, meta_data_file,
                                       model_spec, is_training)
            return QuestionAnswerDataLoader(
                dataset=dataset,
                size=meta_data['size'],
                version_2_with_negative=meta_data['version_2_with_negative'],
                examples=[],
                features=[],
                squad_file=filename)

        meta_data, examples, features = cls._generate_tf_record_from_squad_file(
            filename, model_spec, tfrecord_file, is_training,
            version_2_with_negative)

        file_util.write_json_file(meta_data_file, meta_data)

        dataset, meta_data = _load(tfrecord_file, meta_data_file, model_spec,
                                   is_training)
        return QuestionAnswerDataLoader(dataset, meta_data['size'],
                                        meta_data['version_2_with_negative'],
                                        examples, features, filename)
Beispiel #20
0
    def create(cls,
               train_data,
               model_spec='average_word_vec',
               validation_data=None,
               batch_size=None,
               epochs=3,
               steps_per_epoch=None,
               shuffle=False,
               do_train=True):
        """Loads data and train the model for test classification.

    Args:
      train_data: Training data.
      model_spec: Specification for the model.
      validation_data: Validation data. If None, skips validation process.
      batch_size: Batch size for training.
      epochs: Number of epochs for training.
      steps_per_epoch: Integer or None. Total number of steps (batches of
        samples) before declaring one epoch finished and starting the next
        epoch. If `steps_per_epoch` is None, the epoch will run until the input
        dataset is exhausted.
      shuffle: Whether the data should be shuffled.
      do_train: Whether to run training.

    Returns:
      An instance based on TextClassifier.
    """
        model_spec = ms.get(model_spec)
        if compat.get_tf_behavior() not in model_spec.compat_tf_versions:
            raise ValueError(
                'Incompatible versions. Expect {}, but got {}.'.format(
                    model_spec.compat_tf_versions, compat.get_tf_behavior()))

        text_classifier = cls(model_spec,
                              train_data.index_to_label,
                              shuffle=shuffle)

        if do_train:
            tf.compat.v1.logging.info('Retraining the models...')
            text_classifier.train(train_data, validation_data, epochs,
                                  batch_size, steps_per_epoch)
        else:
            text_classifier.create_model()

        return text_classifier
def run(data_dir, export_dir, spec='efficientnet_lite0', **kwargs):
    """Runs demo."""
    spec = model_spec.get(spec)
    data = ImageClassifierDataLoader.from_folder(data_dir)
    train_data, rest_data = data.split(0.8)
    validation_data, test_data = rest_data.split(0.5)

    model = image_classifier.create(train_data,
                                    model_spec=spec,
                                    validation_data=validation_data,
                                    **kwargs)

    _, acc = model.evaluate(test_data)
    print('Test accuracy: %f' % acc)

    # Exports to TFLite and SavedModel, with label file.
    export_format = [
        ExportFormat.TFLITE,
        ExportFormat.SAVED_MODEL,
    ]
    model.export(export_dir, export_format=export_format)
def run(data_dir,
        tflite_filename,
        label_filename,
        spec='efficientnet_b0',
        **kwargs):
  """Runs demo."""
  spec = model_spec.get(spec)
  data = ImageClassifierDataLoader.from_folder(data_dir)
  train_data, rest_data = data.split(0.8)
  validation_data, test_data = rest_data.split(0.5)

  model = image_classifier.create(
      train_data,
      model_export_format=ModelExportFormat.TFLITE,
      model_spec=spec,
      validation_data=validation_data,
      **kwargs)

  _, acc = model.evaluate(test_data)
  print('Test accuracy: %f' % acc)
  model.export(tflite_filename, label_filename)
Beispiel #23
0
    def create(cls,
               train_data,
               model_spec='average_word_vec',
               validation_data=None,
               batch_size=None,
               epochs=3,
               shuffle=False,
               do_train=True):
        """Loads data and train the model for test classification.

    Args:
      train_data: Training data.
      model_spec: Specification for the model.
      validation_data: Validation data. If None, skips validation process.
      batch_size: Batch size for training.
      epochs: Number of epochs for training.
      shuffle: Whether the data should be shuffled.
      do_train: Whether to run training.

    Returns:
      An instance based on TextClassifier.
    """
        model_spec = ms.get(model_spec)
        if compat.get_tf_behavior() not in model_spec.compat_tf_versions:
            raise ValueError(
                'Incompatible versions. Expect {}, but got {}.'.format(
                    model_spec.compat_tf_versions, compat.get_tf_behavior()))

        text_classifier = cls(model_spec,
                              train_data.index_to_label,
                              shuffle=shuffle)

        if do_train:
            tf.compat.v1.logging.info('Retraining the models...')
            text_classifier.train(train_data, validation_data, epochs,
                                  batch_size)
        else:
            text_classifier.create_model()

        return text_classifier
Beispiel #24
0
    def from_csv(cls,
                 filename,
                 text_column,
                 label_column,
                 fieldnames=None,
                 model_spec='average_word_vec',
                 is_training=True,
                 delimiter=',',
                 quotechar='"',
                 shuffle=False,
                 cache_dir=None):
        """Loads text with labels from the csv file and preproecess text according to `model_spec`.

    Args:
      filename: Name of the file.
      text_column: String, Column name for input text.
      label_column: String, Column name for labels.
      fieldnames: A sequence, used in csv.DictReader. If fieldnames is omitted,
        the values in the first row of file f will be used as the fieldnames.
      model_spec: Specification for the model.
      is_training: Whether the loaded data is for training or not.
      delimiter: Character used to separate fields.
      quotechar: Character used to quote fields containing special characters.
      shuffle: boolean, if shuffle, random shuffle data.
      cache_dir: The cache directory to save preprocessed data. If None,
        generates a temporary directory to cache preprocessed data.

    Returns:
      TextDataset containing text, labels and other related info.
    """
        model_spec = ms.get(model_spec)
        csv_name = os.path.basename(filename)

        is_cached, tfrecord_file, meta_data_file, vocab_file = cls._get_cache_info(
            cache_dir, csv_name, model_spec, is_training)
        # If cached, directly loads data from cache directory.
        if is_cached:
            return cls._load_data(tfrecord_file, meta_data_file, model_spec)

        lines = cls._read_csv(filename, fieldnames, delimiter, quotechar)
        if shuffle:
            random.shuffle(lines)

        # Gets labels.
        label_set = set()
        for line in lines:
            label_set.add(line[label_column])
        label_names = sorted(label_set)

        # Generates text examples from csv file.
        examples = []
        for i, line in enumerate(lines):
            text, label = line[text_column], line[label_column]
            guid = '%s-%d' % (csv_name, i)
            examples.append(
                classifier_data_lib.InputExample(guid, text, None, label))

        # Saves preprocessed data and other assets into files.
        cls._save_data(examples, model_spec, label_names, tfrecord_file,
                       meta_data_file, vocab_file, is_training)

        # Loads data from cache directory.
        return cls._load_data(tfrecord_file, meta_data_file, model_spec)
Beispiel #25
0
 def test_get_raises(self):
   with self.assertRaises(KeyError):
     ms.get('not_exist_model_spec')
Beispiel #26
0
 def test_get_not_none(self, model):
   spec = ms.get(model)
   self.assertIsNotNone(spec)
 def test_get_not_none_recommendation_models(self, model):
   spec = ms.get(
       model,
       input_spec=recommendation_testutil.get_input_spec(),
       model_hparams=recommendation_testutil.get_model_hparams())
   self.assertIsNotNone(spec)
Beispiel #28
0
    def from_folder(cls,
                    filename,
                    model_spec='average_word_vec',
                    is_training=True,
                    class_labels=None,
                    shuffle=True,
                    cache_dir=None):
        """Loads text with labels and preproecess text according to `model_spec`.

    Assume the text data of the same label are in the same subdirectory. each
    file is one text.

    Args:
      filename: Name of the file.
      model_spec: Specification for the model.
      is_training: Whether the loaded data is for training or not.
      class_labels: Class labels that should be considered. Name of the
        subdirectory not in `class_labels` will be ignored. If None, all the
        subdirectories will be considered.
      shuffle: boolean, if shuffle, random shuffle data.
      cache_dir: The cache directory to save preprocessed data. If None,
        generates a temporary directory to cache preprocessed data.

    Returns:
      TextDataset containing text, labels and other related info.
    """
        model_spec = ms.get(model_spec)
        data_root = os.path.abspath(filename)
        folder_name = os.path.basename(data_root)

        is_cached, tfrecord_file, meta_data_file, vocab_file = cls._get_cache_info(
            cache_dir, folder_name, model_spec, is_training)
        # If cached, directly loads data from cache directory.
        if is_cached:
            return cls._load_data(tfrecord_file, meta_data_file, model_spec)

        # Gets paths of all text.
        if class_labels:
            all_text_paths = []
            for class_label in class_labels:
                all_text_paths.extend(
                    list(
                        tf.io.gfile.glob(
                            os.path.join(data_root, class_label) + r'/*')))
        else:
            all_text_paths = list(tf.io.gfile.glob(data_root + r'/*/*'))

        all_text_size = len(all_text_paths)
        if all_text_size == 0:
            raise ValueError('Text size is zero')

        if shuffle:
            random.shuffle(all_text_paths)

        # Gets label and its index.
        if class_labels:
            label_names = sorted(class_labels)
        else:
            label_names = sorted(
                name for name in os.listdir(data_root)
                if os.path.isdir(os.path.join(data_root, name)))

        # Generates text examples from folder.
        examples = []
        for i, path in enumerate(all_text_paths):
            with tf.io.gfile.GFile(path, 'r') as f:
                text = f.read()
            guid = '%s-%d' % (folder_name, i)
            label = os.path.basename(os.path.dirname(path))
            examples.append(
                classifier_data_lib.InputExample(guid, text, None, label))

        # Saves preprocessed data and other assets into files.
        cls._save_data(examples, model_spec, label_names, tfrecord_file,
                       meta_data_file, vocab_file, is_training)

        # Loads data from cache directory.
        return cls._load_data(tfrecord_file, meta_data_file, model_spec)
Beispiel #29
0
def create(train_data,
           model_spec='efficientnet_lite0',
           validation_data=None,
           batch_size=None,
           epochs=None,
           train_whole_model=None,
           dropout_rate=None,
           learning_rate=None,
           momentum=None,
           shuffle=False,
           use_augmentation=False,
           use_hub_library=True,
           warmup_steps=None,
           model_dir=None,
           do_train=True):
    """Loads data and retrains the model based on data for image classification.

  Args:
    train_data: Training data.
    model_spec: Specification for the model.
    validation_data: Validation data. If None, skips validation process.
    batch_size: Number of samples per training step. If `use_hub_library` is
      False, it represents the base learning rate when train batch size is 256
      and it's linear to the batch size.
    epochs: Number of epochs for training.
    train_whole_model: If true, the Hub module is trained together with the
      classification layer on top. Otherwise, only train the top classification
      layer.
    dropout_rate: The rate for dropout.
    learning_rate: Base learning rate when train batch size is 256. Linear to
      the batch size.
    momentum: a Python float forwarded to the optimizer. Only used when
      `use_hub_library` is True.
    shuffle: Whether the data should be shuffled.
    use_augmentation: Use data augmentation for preprocessing.
    use_hub_library: Use `make_image_classifier_lib` from tensorflow hub to
      retrain the model.
    warmup_steps: Number of warmup steps for warmup schedule on learning rate.
      If None, the default warmup_steps is used which is the total training
      steps in two epochs. Only used when `use_hub_library` is False.
    model_dir: The location of the model checkpoint files. Only used when
      `use_hub_library` is False.
    do_train: Whether to run training.

  Returns:
    An instance of ImageClassifier class.
  """
    model_spec = ms.get(model_spec)
    if compat.get_tf_behavior() not in model_spec.compat_tf_versions:
        raise ValueError(
            'Incompatible versions. Expect {}, but got {}.'.format(
                model_spec.compat_tf_versions, compat.get_tf_behavior()))

    if use_hub_library:
        hparams = get_hub_lib_hparams(batch_size=batch_size,
                                      train_epochs=epochs,
                                      do_fine_tuning=train_whole_model,
                                      dropout_rate=dropout_rate,
                                      learning_rate=learning_rate,
                                      momentum=momentum)
    else:
        hparams = train_image_classifier_lib.HParams.get_hparams(
            batch_size=batch_size,
            train_epochs=epochs,
            do_fine_tuning=train_whole_model,
            dropout_rate=dropout_rate,
            learning_rate=learning_rate,
            warmup_steps=warmup_steps,
            model_dir=model_dir)

    image_classifier = ImageClassifier(model_spec,
                                       train_data.index_to_label,
                                       shuffle=shuffle,
                                       hparams=hparams,
                                       use_augmentation=use_augmentation,
                                       representative_data=train_data)

    if do_train:
        tf.compat.v1.logging.info('Retraining the models...')
        image_classifier.train(train_data, validation_data)
    else:
        # Used in evaluation.
        image_classifier.create_model(with_loss_and_metrics=True)

    return image_classifier
Beispiel #30
0
    def testEfficientDetLite0(self):
        # Gets model specification.
        spec = model_spec.get('efficientdet_lite0')

        # Prepare data.
        images_dir, annotations_dir, label_map = test_util.create_pascal_voc(
            self.get_temp_dir())
        data = object_detector_dataloader.DataLoader.from_pascal_voc(
            images_dir, annotations_dir, label_map)

        # Train the model.
        task = object_detector.create(data, spec, batch_size=1, epochs=1)
        self.assertEqual(spec.config.num_classes, 2)

        # Evaluate trained model
        metrics = task.evaluate(data)
        self.assertIsInstance(metrics, dict)
        self.assertGreaterEqual(metrics['AP'], 0)

        # Export the model to saved model.
        output_path = os.path.join(self.get_temp_dir(), 'saved_model')
        task.export(self.get_temp_dir(),
                    export_format=ExportFormat.SAVED_MODEL)
        self.assertTrue(os.path.isdir(output_path))
        self.assertNotEqual(len(os.listdir(output_path)), 0)

        # Export the model to TFLite model.
        output_path = os.path.join(self.get_temp_dir(), 'float.tflite')
        task.export(self.get_temp_dir(),
                    tflite_filename='float.tflite',
                    quantization_type=QuantizationType.FP32,
                    export_format=ExportFormat.TFLITE,
                    with_metadata=True,
                    export_metadata_json_file=True)
        # Checks the sizes of the float32 TFLite model files in bytes.
        model_size = 13476379
        self.assertNear(os.path.getsize(output_path), model_size, 50000)

        json_output_file = os.path.join(self.get_temp_dir(), 'float.json')
        self.assertTrue(os.path.isfile(json_output_file))
        self.assertGreater(os.path.getsize(json_output_file), 0)
        expected_json_file = test_util.get_test_data_path(
            'efficientdet_lite0_metadata.json')
        self.assertTrue(filecmp.cmp(json_output_file, expected_json_file))

        # Evaluate the TFLite model.
        task.evaluate_tflite(output_path, data)
        self.assertIsInstance(metrics, dict)
        self.assertGreaterEqual(metrics['AP'], 0)

        # Export the model to quantized TFLite model.
        # TODO(b/175173304): Skips the test for stable tensorflow 2.4 for now since
        # it fails. Will revert this change after TF upgrade.
        if tf.__version__.startswith('2.4'):
            return

        # Not include QuantizationType.FP32 here since we have already tested it
        # above together with metadata file test.
        types = (QuantizationType.INT8, QuantizationType.FP16,
                 QuantizationType.DYNAMIC)
        # The sizes of the TFLite model files in bytes.
        model_sizes = (4439987, 6840331, 4289875)
        for quantization_type, model_size in zip(types, model_sizes):
            filename = quantization_type.name.lower() + '.tflite'
            output_path = os.path.join(self.get_temp_dir(), filename)
            task.export(self.get_temp_dir(),
                        quantization_type=quantization_type,
                        tflite_filename=filename,
                        export_format=ExportFormat.TFLITE)
            self.assertNear(os.path.getsize(output_path), model_size, 50000)