Beispiel #1
0
    def _train_and_test(self,
                        model_path,
                        output_model_path,
                        training_datapoints,
                        test_datapoints,
                        keep_classes=False):
        engine = ImprintingEngine(model_path, keep_classes)
        image_shape = self._get_image_shape(model_path)
        data_dir = test_utils.test_data_path('imprinting')
        # train.
        for training_datapoint in training_datapoints:
            engine.train(
                test_utils.prepare_images(training_datapoint['image_names'],
                                          data_dir, image_shape),
                training_datapoint['label_id'])
        engine.save_model(output_model_path)

        # Test.
        engine = ClassificationEngine(output_model_path)
        self.assertEqual(1, engine.get_num_of_output_tensors())
        if not keep_classes:
            self.assertEqual(len(training_datapoints),
                             engine.get_output_tensor_size(0))
        for test_datapoint in test_datapoints:
            self._classify_image(engine, data_dir,
                                 test_datapoint['image_name'],
                                 test_datapoint['label_id'],
                                 test_datapoint['score'])
Beispiel #2
0
    def test_train_all(self):
        for model_path in self._MODEL_LIST:
            with self.subTest():
                with test_utils.TemporaryFile(
                        suffix='.tflite') as output_model_path:
                    data_dir = test_utils.test_data_path('imprinting')
                    engine = ImprintingEngine(model_path, keep_classes=False)
                    image_shape = self._get_image_shape(model_path)

                    # train.
                    train_set = [['cat_train_0.bmp'], ['dog_train_0.bmp'],
                                 ['hotdog_train_0.bmp', 'hotdog_train_1.bmp']]
                    train_input = [
                        (test_utils.prepare_images(image_list, data_dir,
                                                   image_shape))
                        for image_list in train_set
                    ]
                    engine.train_all(train_input)
                    engine.save_model(output_model_path.name)

                    # Test.
                    engine = ClassificationEngine(output_model_path.name)
                    self.assertEqual(1, engine.get_num_of_output_tensors())
                    self.assertEqual(3, engine.get_output_tensor_size(0))

                    label_to_id_map = {'cat': 0, 'dog': 1, 'hot_dog': 2}
                    self._classify_image(engine, data_dir, 'cat_test_0.bmp',
                                         label_to_id_map['cat'], 0.99)
                    self._classify_image(engine, data_dir, 'dog_test_0.bmp',
                                         label_to_id_map['dog'], 0.99)
                    self._classify_image(engine, data_dir, 'hotdog_test_0.bmp',
                                         label_to_id_map['hot_dog'], 0.99)
def run_benchmark(model):
    """Measures training time for given model with random data.

  Args:
    model: string, file name of the input model.

  Returns:
    float, training time.
  """
    input_size = input_tensor_size(model)
    engine = ImprintingEngine(test_utils.test_data_path(model),
                              keep_classes=False)

    np.random.seed(12345)
    data_by_category = {}
    # 10 Categories, each has 20 images.
    for i in range(0, 10):
        data_by_category[i] = []
        for j in range(0, 20):
            data_by_category[i].append(np.random.randint(0, 255, input_size))

    start = time.perf_counter()
    for class_id, tensors in enumerate(data_by_category.values()):
        engine.train(tensors, class_id)
    with tempfile.NamedTemporaryFile() as f:
        engine.save_model(f.name)
    training_time = time.perf_counter() - start

    print('Model: %s' % model)
    print('Training time: %.2fs' % training_time)
    return training_time
def main():
    args = _parse_args()
    print('---------------      Parsing data set    -----------------')
    print('Dataset path:', args.data)

    train_set, test_set = _read_data(args.data, args.test_ratio)
    print('Image list successfully parsed! Category Num = ', len(train_set))
    shape = _get_required_shape(args.model_path)

    print('---------------- Processing training data ----------------')
    print('This process may take more than 30 seconds.')
    train_input = []
    labels_map = {}
    for class_id, (category, image_list) in enumerate(train_set.items()):
        print('Processing category:', category)
        train_input.append(
            _prepare_images(image_list, os.path.join(args.data, category),
                            shape))
        labels_map[class_id] = category
    print('----------------      Start training     -----------------')
    engine = ImprintingEngine(args.model_path)
    engine.train_all(train_input)
    print('----------------     Training finished!  -----------------')

    engine.save_model(args.output)
    print('Model saved as : ', args.output)
    _save_labels(labels_map, args.output)

    print('------------------   Start evaluating   ------------------')
    engine = ClassificationEngine(args.output)
    top_k = 5
    correct = [0] * top_k
    wrong = [0] * top_k
    for category, image_list in test_set.items():
        print('Evaluating category [', category, ']')
        for img_name in image_list:
            img = Image.open(os.path.join(args.data, category, img_name))
            candidates = engine.classify_with_image(img,
                                                    threshold=0.1,
                                                    top_k=top_k)
            recognized = False
            for i in range(top_k):
                if i < len(candidates) and labels_map[candidates[i]
                                                      [0]] == category:
                    recognized = True
                if recognized:
                    correct[i] = correct[i] + 1
                else:
                    wrong[i] = wrong[i] + 1
    print('----------------     Evaluation result   -----------------')
    for i in range(top_k):
        print('Top {} : {:.0%}'.format(i + 1,
                                       correct[i] / (correct[i] + wrong[i])))
Beispiel #5
0
 def test_imprinting_engine_saving_without_training(self):
   model_list = [
       'imprinting/mobilenet_v1_1.0_224_l2norm_quant.tflite',
       'imprinting/mobilenet_v1_1.0_224_l2norm_quant_edgetpu.tflite'
   ]
   for model in model_list:
     error_message = None
     engine = ImprintingEngine(
         test_utils.test_data_path(model), keep_classes=False)
     try:
       with tempfile.NamedTemporaryFile(suffix='.tflite') as output_model_path:
         engine.save_model(output_model_path.name)
     except RuntimeError as e:
       error_message = str(e)
     self.assertEqual('Model without training won\'t be saved!', error_message)
def _benchmark_for_training(model, data_set):
    """Measures training time for given model and data set.

  Args:
    model: string, file name of the input model.
    data_set: string, name of the folder storing images. Labels file is also
      named as '[data_set].csv'.

  Returns:
    float, training time.
  """
    shape = _get_shape(model)
    engine = ImprintingEngine(test_utils.test_data_path('imprinting', model),
                              keep_classes=False)
    output_model_path = '/tmp/model_for_benchmark.tflite'

    data_dir = test_utils.test_data_path(data_set)

    # The labels file is named as '[data_set].csv'.
    image_list_by_category = test_utils.prepare_classification_data_set(
        test_utils.test_data_path(data_set + '.csv'))

    start_time = time.monotonic()
    for category, image_list in image_list_by_category.items():
        category_dir = os.path.join(data_dir, category)
        image_list_by_category[category] = test_utils.prepare_images(
            image_list, category_dir, shape)
    end_time = time.monotonic()
    print('Image pre-processing time: ', end_time - start_time, 's')
    start_time = end_time
    for class_id, tensors in enumerate(image_list_by_category.values()):
        engine.train(tensors, class_id)
    engine.save_model(output_model_path)
    training_time = time.monotonic() - start_time
    print('Model: ', model)
    print('Data set : ', data_set)
    print('Training time : ', training_time, 's')
    # Remove the model.
    subprocess.call(['rm', output_model_path])
    return training_time
Beispiel #7
0
def retrain_model(props):
    """
        This function is using the Imprinting technique to retrain the model by only changing the last layer.
        All classes will be abandoned while training multiple users
    """
    MODEL_PATH = props['classification']['default_path']

    click.echo('Parsing data for retraining...')
    train_set = {}
    test_set = {}
    for user in props['user'].keys():
        image_dir = props['user'][user]['images']
        images = [
            f for f in os.listdir(image_dir)
            if os.path.isfile(os.path.join(image_dir, f))
        ]
        if images:
            # allocate the number of images for training an validation
            net_pictures = len(images)
            click.echo(
                click.style('We found {} pictures for {}'.format(
                    net_pictures, user),
                            fg='green'))
            while True:
                k = int(
                    click.prompt(
                        'How many pictures do you want for validating the training?'
                    ))
                if k > 0.25 * net_pictures:
                    click.echo(
                        click.style(
                            'At most 25% ({} pictures) of the training data can be used for testing the model!'
                            .format(int(0.25 * net_pictures)),
                            fg='yellow'))
                elif k < 2:
                    click.echo(
                        click.style(
                            'At least 3 pictues must be used for testing the model!',
                            fg='yellow'))
                else:
                    break

            test_set[user] = images[:k]
            assert test_set, 'No images to test [{}]'.format(user)
            train_set[user] = images[k:]
            assert train_set, 'No images to train [{}]'.format(user)

    #get shape of model to retrain
    tmp = BasicEngine(MODEL_PATH)
    input_tensor = tmp.get_input_tensor_shape()
    shape = (input_tensor[2], input_tensor[1])

    #rezising pictures and creating new labels map
    train_input = []
    labels_map = {}
    for user_id, (user, image_list) in enumerate(train_set.items()):
        ret = []
        for filename in image_list:
            with Image.open(
                    os.path.join(props['user'][user]['images'],
                                 filename)) as img:
                img = img.convert('RGB')
                img = img.resize(shape, Image.NEAREST)
                ret.append(np.asarray(img).flatten())
        train_input.append(np.array(ret))
        labels_map[user_id] = user
    #Train model
    click.echo('Start training')
    engine = ImprintingEngine(MODEL_PATH, keep_classes=False)
    engine.train_all(train_input)
    click.echo(click.style('Training finished!', fg='green'))

    #gethering old model files
    old_model = props['classification']['path']
    old_labels = props['classification']['labels']
    #saving new model
    props['classification']['path'] = './Models/model{}.tflite'.format(''.join(
        ['_' + u for u in labels_map.values()]))
    engine.save_model(props['classification']['path'])
    #saving labels
    props['classification']['labels'] = props['classification'][
        'path'].replace('classification', 'labels').replace('tflite', 'json')
    with open(props['classification']['labels'], 'w') as f:
        json.dump(labels_map, f, indent=4)
    #Evaluating how well the retrained model performed
    click.echo('Start evaluation')
    engine = ClassificationEngine(props['classification']['path'])
    top_k = 5
    correct = [0] * top_k
    wrong = [0] * top_k
    for user, image_list in test_set.items():
        for img_name in image_list:
            img = Image.open(
                os.path.join(props['user'][user]['images'], img_name))
            candidates = engine.classify_with_image(img,
                                                    threshold=0.1,
                                                    top_k=top_k)
            recognized = False
            for i in range(top_k):
                if i < len(candidates) and user == labels_map[candidates[i]
                                                              [0]]:
                    recognized = True
                if recognized:
                    correct[i] = correct[i] + 1
                else:
                    wrong[i] = wrong[i] + 1
        click.echo('Evaluation Results:')
        for i in range(top_k):
            click.echo('Top {} : {:.0%}'.format(
                i + 1, correct[i] / (correct[i] + wrong[i])))
        #  TODO  highlight with colors how well it perforemed

    if not old_model == props['classification'][
            'path'] and not old_labels == props['classification'][
                'labels'] and (os.path.exists(old_labels)
                               or os.path.exists(old_model)):
        if not click.confirm('Do you want to keep old models?'):
            os.remove(old_model)
            os.remove(old_labels)
            click.echo(click.style('Old models removed.', fg='green'))
    #saving properties
    save_properties(props)
    def _transfer_learn_and_evaluate(self, model_path, keep_classes,
                                     dataset_path, test_ratio, top_k_range):
        """Transfer-learns with given params and returns the evaluatoin result.

    Args:
      model_path: string, path of the base model.
      keep_classes: bool, whether to keep base model classes.
      dataset_path: string, path to the directory of dataset. The images
        should be put under sub-directory named by category.
      test_ratio: float, the ratio of images used for test.
      top_k_range: int, top_k range to be evaluated. The function will return
        accuracy from top 1 to top k.

    Returns:
      list of float numbers.
    """
        print('---------------      Parsing dataset      ----------------')
        print('Dataset path:', dataset_path)

        # train in fixed order to ensure the same evaluation result.
        train_set, test_set = test_utils.prepare_data_set_from_directory(
            dataset_path, test_ratio, True)

        print('Image list successfully parsed! Number of Categories = ',
              len(train_set))
        input_shape = self._get_input_tensor_shape(model_path)
        required_image_shape = (input_shape[2], input_shape[1]
                                )  # (width, height)
        print('---------------  Processing training data ----------------')
        print('This process may take more than 30 seconds.')
        num_classes = self._get_output_number_classes(
            model_path) if keep_classes else 0
        train_input = []
        labels_map = {}
        for class_id, (category, image_list) in enumerate(train_set.items()):
            print('Processing {} ({} images)'.format(category,
                                                     len(image_list)))
            train_input.append(
                test_utils.prepare_images(image_list,
                                          os.path.join(dataset_path, category),
                                          required_image_shape))
            labels_map[num_classes + class_id] = category

        # train
        print('----------------      Start training     -----------------')
        imprinting_engine = ImprintingEngine(model_path, keep_classes)
        imprinting_engine.train_all(train_input)
        print('----------------     Training finished   -----------------')
        output_model_path = tempfile.NamedTemporaryFile(suffix='.tflite')
        imprinting_engine.save_model(output_model_path.name)

        # Evaluate
        print('----------------     Start evaluating    -----------------')
        classification_engine = ClassificationEngine(output_model_path.name)
        # top[i] represents number of top (i+1) correct inference.
        top_k_correct_count = [0] * top_k_range
        image_num = 0
        for category, image_list in test_set.items():
            n = len(image_list)
            print('Evaluating {} ({} images)'.format(category, n))
            for image_name in image_list:
                with test_image(
                        os.path.join(dataset_path, category,
                                     image_name)) as raw_image:
                    # Set threshold as a negative number to ensure we get top k candidates
                    # even if its score is 0.
                    candidates = classification_engine.classify_with_image(
                        raw_image, threshold=-0.1, top_k=top_k_range)
                    for i in range(len(candidates)):
                        if candidates[i][0] in labels_map and labels_map[
                                candidates[i][0]] == category:
                            top_k_correct_count[i] += 1
                            break
            image_num += n
        for i in range(1, top_k_range):
            top_k_correct_count[i] += top_k_correct_count[i - 1]

        return [top_k_correct_count[i] / image_num for i in range(top_k_range)]